You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ag...@apache.org on 2020/08/03 16:08:34 UTC
[arrow] branch master updated: ARROW-9582: [Rust] Implement memory
size methods
This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new f51564b ARROW-9582: [Rust] Implement memory size methods
f51564b is described below
commit f51564b265b645cdbd82f23f94f130450f3c2755
Author: Mahmut Bulut <ve...@gmail.com>
AuthorDate: Mon Aug 3 10:06:58 2020 -0600
ARROW-9582: [Rust] Implement memory size methods
This PR is a slightly extended version of the PR https://github.com/apache/arrow/pull/7853.
* `memory_used`: Only calculates internally held data size.
* `memory_capacity`: Calculates total physical memory size including vtable, pointed size and whatnot. (I am not sure about the name)
cc @andygrove @paddyhoran @nevi-me
Closes #7874 from vertexclique/vcq/ARROW-9582-implement-memory-size
Authored-by: Mahmut Bulut <ve...@gmail.com>
Signed-off-by: Andy Grove <an...@gmail.com>
---
rust/arrow/src/array/array.rs | 106 ++++++++++++++++++++++++++++++++++++++++++
rust/arrow/src/array/data.rs | 40 ++++++++++++++++
rust/arrow/src/array/null.rs | 26 +++++++++--
rust/arrow/src/array/union.rs | 30 ++++++++++++
rust/arrow/src/bitmap.rs | 11 +++++
5 files changed, 209 insertions(+), 4 deletions(-)
diff --git a/rust/arrow/src/array/array.rs b/rust/arrow/src/array/array.rs
index 79d3353..111dea9 100644
--- a/rust/arrow/src/array/array.rs
+++ b/rust/arrow/src/array/array.rs
@@ -213,6 +213,12 @@ pub trait Array: fmt::Debug + Send + Sync + ArrayEqual + JsonEqual {
fn null_count(&self) -> usize {
self.data().null_count()
}
+
+ /// Returns the total number of bytes of memory occupied by the buffers owned by this array.
+ fn get_buffer_memory_size(&self) -> usize;
+
+ /// Returns the total number of bytes of memory occupied physically by this array.
+ fn get_array_memory_size(&self) -> usize;
}
/// A reference-counted reference to a generic `Array`.
@@ -443,6 +449,16 @@ impl<T: ArrowPrimitiveType> Array for PrimitiveArray<T> {
fn data_ref(&self) -> &ArrayDataRef {
&self.data
}
+
+ /// Returns the total number of bytes of memory occupied by the buffers owned by this [PrimitiveArray].
+ fn get_buffer_memory_size(&self) -> usize {
+ self.data.get_buffer_memory_size()
+ }
+
+ /// Returns the total number of bytes of memory occupied physically by this [PrimitiveArray].
+ fn get_array_memory_size(&self) -> usize {
+ self.data.get_array_memory_size() + mem::size_of_val(self)
+ }
}
/// Implementation for primitive arrays with numeric types.
@@ -1168,6 +1184,16 @@ impl Array for ListArray {
fn data_ref(&self) -> &ArrayDataRef {
&self.data
}
+
+ /// Returns the total number of bytes of memory occupied by the buffers owned by this [ListArray].
+ fn get_buffer_memory_size(&self) -> usize {
+ self.data.get_buffer_memory_size()
+ }
+
+ /// Returns the total number of bytes of memory occupied physically by this [ListArray].
+ fn get_array_memory_size(&self) -> usize {
+ self.data.get_array_memory_size() + mem::size_of_val(self)
+ }
}
impl Array for LargeListArray {
@@ -1182,6 +1208,18 @@ impl Array for LargeListArray {
fn data_ref(&self) -> &ArrayDataRef {
&self.data
}
+
+ /// Returns the total number of bytes of memory occupied by the buffers owned by this [LargeListArray].
+ fn get_buffer_memory_size(&self) -> usize {
+ self.data.get_buffer_memory_size() + self.values().get_buffer_memory_size()
+ }
+
+ /// Returns the total number of bytes of memory occupied physically by this [LargeListArray].
+ fn get_array_memory_size(&self) -> usize {
+ self.data.get_array_memory_size()
+ + self.values().get_array_memory_size()
+ + mem::size_of_val(self)
+ }
}
// Helper function for printing potentially long arrays.
@@ -1332,6 +1370,18 @@ impl Array for FixedSizeListArray {
fn data_ref(&self) -> &ArrayDataRef {
&self.data
}
+
+ /// Returns the total number of bytes of memory occupied by the buffers owned by this [FixedSizeListArray].
+ fn get_buffer_memory_size(&self) -> usize {
+ self.data.get_buffer_memory_size() + self.values().get_buffer_memory_size()
+ }
+
+ /// Returns the total number of bytes of memory occupied physically by this [FixedSizeListArray].
+ fn get_array_memory_size(&self) -> usize {
+ self.data.get_array_memory_size()
+ + self.values().get_array_memory_size()
+ + mem::size_of_val(self)
+ }
}
impl fmt::Debug for FixedSizeListArray {
@@ -1398,6 +1448,16 @@ macro_rules! make_binary_type {
fn data_ref(&self) -> &ArrayDataRef {
&self.data
}
+
+ /// Returns the total number of bytes of memory occupied by the buffers owned by this [$name].
+ fn get_buffer_memory_size(&self) -> usize {
+ self.data.get_buffer_memory_size()
+ }
+
+ /// Returns the total number of bytes of memory occupied physically by this [$name].
+ fn get_array_memory_size(&self) -> usize {
+ self.data.get_array_memory_size() + mem::size_of_val(self)
+ }
}
};
}
@@ -1953,6 +2013,16 @@ impl Array for FixedSizeBinaryArray {
fn data_ref(&self) -> &ArrayDataRef {
&self.data
}
+
+ /// Returns the total number of bytes of memory occupied by the buffers owned by this [FixedSizeBinaryArray].
+ fn get_buffer_memory_size(&self) -> usize {
+ self.data.get_buffer_memory_size()
+ }
+
+ /// Returns the total number of bytes of memory occupied physically by this [FixedSizeBinaryArray].
+ fn get_array_memory_size(&self) -> usize {
+ self.data.get_array_memory_size() + mem::size_of_val(self)
+ }
}
/// A nested array type where each child (called *field*) is represented by a separate
@@ -2035,6 +2105,16 @@ impl Array for StructArray {
fn len(&self) -> usize {
self.data().len()
}
+
+ /// Returns the total number of bytes of memory occupied by the buffers owned by this [StructArray].
+ fn get_buffer_memory_size(&self) -> usize {
+ self.data.get_buffer_memory_size()
+ }
+
+ /// Returns the total number of bytes of memory occupied physically by this [StructArray].
+ fn get_array_memory_size(&self) -> usize {
+ self.data.get_array_memory_size() + mem::size_of_val(self)
+ }
}
impl From<Vec<(Field, ArrayRef)>> for StructArray {
@@ -2333,6 +2413,18 @@ impl<T: ArrowPrimitiveType> Array for DictionaryArray<T> {
fn data_ref(&self) -> &ArrayDataRef {
&self.data
}
+
+ /// Returns the total number of bytes of memory occupied by the buffers owned by this [DictionaryArray].
+ fn get_buffer_memory_size(&self) -> usize {
+ self.data.get_buffer_memory_size() + self.values().get_buffer_memory_size()
+ }
+
+ /// Returns the total number of bytes of memory occupied physically by this [DictionaryArray].
+ fn get_array_memory_size(&self) -> usize {
+ self.data.get_array_memory_size()
+ + self.values().get_array_memory_size()
+ + mem::size_of_val(self)
+ }
}
impl<T: ArrowPrimitiveType> fmt::Debug for DictionaryArray<T> {
@@ -2379,6 +2471,13 @@ mod tests {
assert!(arr.is_valid(i));
assert_eq!(i as i32, arr.value(i));
}
+
+ assert_eq!(64, arr.get_buffer_memory_size());
+ let internals_of_primitive_array = 8 + 72; // RawPtrBox & Arc<ArrayData> combined.
+ assert_eq!(
+ arr.get_buffer_memory_size() + internals_of_primitive_array,
+ arr.get_array_memory_size()
+ );
}
#[test]
@@ -2398,6 +2497,13 @@ mod tests {
assert!(!arr.is_valid(i));
}
}
+
+ assert_eq!(128, arr.get_buffer_memory_size());
+ let internals_of_primitive_array = 8 + 72 + 16; // RawPtrBox & Arc<ArrayData> and it's null_bitmap combined.
+ assert_eq!(
+ arr.get_buffer_memory_size() + internals_of_primitive_array,
+ arr.get_array_memory_size()
+ );
}
#[test]
diff --git a/rust/arrow/src/array/data.rs b/rust/arrow/src/array/data.rs
index f8bf5cf..5f6e5dc 100644
--- a/rust/arrow/src/array/data.rs
+++ b/rust/arrow/src/array/data.rs
@@ -18,6 +18,7 @@
//! Contains `ArrayData`, a generic representation of Arrow array data which encapsulates
//! common attributes and operations for Arrow array.
+use std::mem;
use std::sync::Arc;
use crate::bitmap::Bitmap;
@@ -159,6 +160,45 @@ impl ArrayData {
pub fn null_count(&self) -> usize {
self.null_count
}
+
+ /// Returns the total number of bytes of memory occupied by the buffers owned by this [ArrayData].
+ pub fn get_buffer_memory_size(&self) -> usize {
+ let mut size = 0;
+ for buffer in &self.buffers {
+ size += buffer.capacity();
+ }
+ if let Some(bitmap) = &self.null_bitmap {
+ size += bitmap.get_buffer_memory_size()
+ }
+ for child in &self.child_data {
+ size += child.get_buffer_memory_size();
+ }
+ size
+ }
+
+ /// Returns the total number of bytes of memory occupied physically by this [ArrayData].
+ pub fn get_array_memory_size(&self) -> usize {
+ let mut size = 0;
+ // Calculate size of the fields that don't have [get_array_memory_size] method internally.
+ size += mem::size_of_val(self)
+ - mem::size_of_val(&self.buffers)
+ - mem::size_of_val(&self.null_bitmap)
+ - mem::size_of_val(&self.child_data);
+
+ // Calculate rest of the fields top down which contain actual data
+ for buffer in &self.buffers {
+ size += mem::size_of_val(&buffer);
+ size += buffer.capacity();
+ }
+ if let Some(bitmap) = &self.null_bitmap {
+ size += bitmap.get_array_memory_size()
+ }
+ for child in &self.child_data {
+ size += child.get_array_memory_size();
+ }
+
+ size
+ }
}
/// Builder for `ArrayData` type
diff --git a/rust/arrow/src/array/null.rs b/rust/arrow/src/array/null.rs
index 14e1dd7..867bd7c 100644
--- a/rust/arrow/src/array/null.rs
+++ b/rust/arrow/src/array/null.rs
@@ -36,6 +36,7 @@
use std::any::Any;
use std::fmt;
+use std::mem;
use crate::array::{Array, ArrayData, ArrayDataRef};
use crate::datatypes::*;
@@ -83,6 +84,16 @@ impl Array for NullArray {
fn null_count(&self) -> usize {
self.data().len()
}
+
+ /// Returns the total number of bytes of memory occupied by the buffers owned by this [NullArray].
+ fn get_buffer_memory_size(&self) -> usize {
+ self.data.get_buffer_memory_size()
+ }
+
+ /// Returns the total number of bytes of memory occupied physically by this [NullArray].
+ fn get_array_memory_size(&self) -> usize {
+ self.data.get_array_memory_size() + mem::size_of_val(self)
+ }
}
impl From<ArrayDataRef> for NullArray {
@@ -112,11 +123,18 @@ mod tests {
#[test]
fn test_null_array() {
- let array1 = NullArray::new(32);
+ let null_arr = NullArray::new(32);
+
+ assert_eq!(null_arr.len(), 32);
+ assert_eq!(null_arr.null_count(), 32);
+ assert_eq!(null_arr.is_valid(0), false);
- assert_eq!(array1.len(), 32);
- assert_eq!(array1.null_count(), 32);
- assert_eq!(array1.is_valid(0), false);
+ assert_eq!(0, null_arr.get_buffer_memory_size());
+ let internals_of_null_array = 64; // Arc<ArrayData>
+ assert_eq!(
+ null_arr.get_buffer_memory_size() + internals_of_null_array,
+ null_arr.get_array_memory_size()
+ );
}
#[test]
diff --git a/rust/arrow/src/array/union.rs b/rust/arrow/src/array/union.rs
index 9e8f450..9bbf64e 100644
--- a/rust/arrow/src/array/union.rs
+++ b/rust/arrow/src/array/union.rs
@@ -86,6 +86,7 @@ use crate::util::bit_util;
use core::fmt;
use std::any::Any;
use std::collections::HashMap;
+use std::mem;
use std::mem::size_of;
/// An Array that can represent slots of varying types
@@ -296,6 +297,25 @@ impl Array for UnionArray {
fn data_ref(&self) -> &ArrayDataRef {
&self.data
}
+
+ /// Returns the total number of bytes of memory occupied by the buffers owned by this [UnionArray].
+ fn get_buffer_memory_size(&self) -> usize {
+ let mut size = self.data.get_buffer_memory_size();
+ for field in &self.boxed_fields {
+ size += field.get_buffer_memory_size();
+ }
+ size
+ }
+
+ /// Returns the total number of bytes of memory occupied physically by this [UnionArray].
+ fn get_array_memory_size(&self) -> usize {
+ let mut size = self.data.get_array_memory_size();
+ size += mem::size_of_val(self) - mem::size_of_val(&self.boxed_fields);
+ for field in &self.boxed_fields {
+ size += field.get_array_memory_size();
+ }
+ size
+ }
}
impl fmt::Debug for UnionArray {
@@ -675,6 +695,16 @@ mod tests {
let value = slot.value(0);
assert_eq!(expected_value, &value);
}
+
+ assert_eq!(
+ 4 * 8 * 4 * mem::size_of::<i32>(),
+ union.get_buffer_memory_size()
+ );
+ let internals_of_union_array = (8 + 72) + (union.boxed_fields.len() * 144); // Arc<ArrayData> & Vec<ArrayRef> combined.
+ assert_eq!(
+ union.get_buffer_memory_size() + internals_of_union_array,
+ union.get_array_memory_size()
+ );
}
#[test]
diff --git a/rust/arrow/src/bitmap.rs b/rust/arrow/src/bitmap.rs
index 06412af..e9060e6 100644
--- a/rust/arrow/src/bitmap.rs
+++ b/rust/arrow/src/bitmap.rs
@@ -21,6 +21,7 @@
use crate::buffer::Buffer;
use crate::error::Result;
use crate::util::bit_util;
+use std::mem;
use std::ops::{BitAnd, BitOr};
@@ -67,6 +68,16 @@ impl Bitmap {
pub fn into_buffer(self) -> Buffer {
self.bits
}
+
+ /// Returns the total number of bytes of memory occupied by the buffers owned by this [Bitmap].
+ pub fn get_buffer_memory_size(&self) -> usize {
+ self.bits.capacity()
+ }
+
+ /// Returns the total number of bytes of memory occupied physically by this [Bitmap].
+ pub fn get_array_memory_size(&self) -> usize {
+ self.bits.capacity() + mem::size_of_val(self)
+ }
}
impl<'a, 'b> BitAnd<&'b Bitmap> for &'a Bitmap {