You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ag...@apache.org on 2020/08/03 16:08:34 UTC

[arrow] branch master updated: ARROW-9582: [Rust] Implement memory size methods

This is an automated email from the ASF dual-hosted git repository.

agrove pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new f51564b  ARROW-9582: [Rust] Implement memory size methods
f51564b is described below

commit f51564b265b645cdbd82f23f94f130450f3c2755
Author: Mahmut Bulut <ve...@gmail.com>
AuthorDate: Mon Aug 3 10:06:58 2020 -0600

    ARROW-9582: [Rust] Implement memory size methods
    
    This PR is a slightly extended version of the PR https://github.com/apache/arrow/pull/7853.
    
    * `memory_used`: Only calculates internally held data size.
    * `memory_capacity`: Calculates total physical memory size including vtable, pointed size and whatnot. (I am not sure about the name)
    
    cc @andygrove @paddyhoran @nevi-me
    
    Closes #7874 from vertexclique/vcq/ARROW-9582-implement-memory-size
    
    Authored-by: Mahmut Bulut <ve...@gmail.com>
    Signed-off-by: Andy Grove <an...@gmail.com>
---
 rust/arrow/src/array/array.rs | 106 ++++++++++++++++++++++++++++++++++++++++++
 rust/arrow/src/array/data.rs  |  40 ++++++++++++++++
 rust/arrow/src/array/null.rs  |  26 +++++++++--
 rust/arrow/src/array/union.rs |  30 ++++++++++++
 rust/arrow/src/bitmap.rs      |  11 +++++
 5 files changed, 209 insertions(+), 4 deletions(-)

diff --git a/rust/arrow/src/array/array.rs b/rust/arrow/src/array/array.rs
index 79d3353..111dea9 100644
--- a/rust/arrow/src/array/array.rs
+++ b/rust/arrow/src/array/array.rs
@@ -213,6 +213,12 @@ pub trait Array: fmt::Debug + Send + Sync + ArrayEqual + JsonEqual {
     fn null_count(&self) -> usize {
         self.data().null_count()
     }
+
+    /// Returns the total number of bytes of memory occupied by the buffers owned by this array.
+    fn get_buffer_memory_size(&self) -> usize;
+
+    /// Returns the total number of bytes of memory occupied physically by this array.
+    fn get_array_memory_size(&self) -> usize;
 }
 
 /// A reference-counted reference to a generic `Array`.
@@ -443,6 +449,16 @@ impl<T: ArrowPrimitiveType> Array for PrimitiveArray<T> {
     fn data_ref(&self) -> &ArrayDataRef {
         &self.data
     }
+
+    /// Returns the total number of bytes of memory occupied by the buffers owned by this [PrimitiveArray].
+    fn get_buffer_memory_size(&self) -> usize {
+        self.data.get_buffer_memory_size()
+    }
+
+    /// Returns the total number of bytes of memory occupied physically by this [PrimitiveArray].
+    fn get_array_memory_size(&self) -> usize {
+        self.data.get_array_memory_size() + mem::size_of_val(self)
+    }
 }
 
 /// Implementation for primitive arrays with numeric types.
@@ -1168,6 +1184,16 @@ impl Array for ListArray {
     fn data_ref(&self) -> &ArrayDataRef {
         &self.data
     }
+
+    /// Returns the total number of bytes of memory occupied by the buffers owned by this [ListArray].
+    fn get_buffer_memory_size(&self) -> usize {
+        self.data.get_buffer_memory_size()
+    }
+
+    /// Returns the total number of bytes of memory occupied physically by this [ListArray].
+    fn get_array_memory_size(&self) -> usize {
+        self.data.get_array_memory_size() + mem::size_of_val(self)
+    }
 }
 
 impl Array for LargeListArray {
@@ -1182,6 +1208,18 @@ impl Array for LargeListArray {
     fn data_ref(&self) -> &ArrayDataRef {
         &self.data
     }
+
+    /// Returns the total number of bytes of memory occupied by the buffers owned by this [LargeListArray].
+    fn get_buffer_memory_size(&self) -> usize {
+        self.data.get_buffer_memory_size() + self.values().get_buffer_memory_size()
+    }
+
+    /// Returns the total number of bytes of memory occupied physically by this [LargeListArray].
+    fn get_array_memory_size(&self) -> usize {
+        self.data.get_array_memory_size()
+            + self.values().get_array_memory_size()
+            + mem::size_of_val(self)
+    }
 }
 
 // Helper function for printing potentially long arrays.
@@ -1332,6 +1370,18 @@ impl Array for FixedSizeListArray {
     fn data_ref(&self) -> &ArrayDataRef {
         &self.data
     }
+
+    /// Returns the total number of bytes of memory occupied by the buffers owned by this [FixedSizeListArray].
+    fn get_buffer_memory_size(&self) -> usize {
+        self.data.get_buffer_memory_size() + self.values().get_buffer_memory_size()
+    }
+
+    /// Returns the total number of bytes of memory occupied physically by this [FixedSizeListArray].
+    fn get_array_memory_size(&self) -> usize {
+        self.data.get_array_memory_size()
+            + self.values().get_array_memory_size()
+            + mem::size_of_val(self)
+    }
 }
 
 impl fmt::Debug for FixedSizeListArray {
@@ -1398,6 +1448,16 @@ macro_rules! make_binary_type {
             fn data_ref(&self) -> &ArrayDataRef {
                 &self.data
             }
+
+            /// Returns the total number of bytes of memory occupied by the buffers owned by this [$name].
+            fn get_buffer_memory_size(&self) -> usize {
+                self.data.get_buffer_memory_size()
+            }
+
+            /// Returns the total number of bytes of memory occupied physically by this [$name].
+            fn get_array_memory_size(&self) -> usize {
+                self.data.get_array_memory_size() + mem::size_of_val(self)
+            }
         }
     };
 }
@@ -1953,6 +2013,16 @@ impl Array for FixedSizeBinaryArray {
     fn data_ref(&self) -> &ArrayDataRef {
         &self.data
     }
+
+    /// Returns the total number of bytes of memory occupied by the buffers owned by this [FixedSizeBinaryArray].
+    fn get_buffer_memory_size(&self) -> usize {
+        self.data.get_buffer_memory_size()
+    }
+
+    /// Returns the total number of bytes of memory occupied physically by this [FixedSizeBinaryArray].
+    fn get_array_memory_size(&self) -> usize {
+        self.data.get_array_memory_size() + mem::size_of_val(self)
+    }
 }
 
 /// A nested array type where each child (called *field*) is represented by a separate
@@ -2035,6 +2105,16 @@ impl Array for StructArray {
     fn len(&self) -> usize {
         self.data().len()
     }
+
+    /// Returns the total number of bytes of memory occupied by the buffers owned by this [StructArray].
+    fn get_buffer_memory_size(&self) -> usize {
+        self.data.get_buffer_memory_size()
+    }
+
+    /// Returns the total number of bytes of memory occupied physically by this [StructArray].
+    fn get_array_memory_size(&self) -> usize {
+        self.data.get_array_memory_size() + mem::size_of_val(self)
+    }
 }
 
 impl From<Vec<(Field, ArrayRef)>> for StructArray {
@@ -2333,6 +2413,18 @@ impl<T: ArrowPrimitiveType> Array for DictionaryArray<T> {
     fn data_ref(&self) -> &ArrayDataRef {
         &self.data
     }
+
+    /// Returns the total number of bytes of memory occupied by the buffers owned by this [DictionaryArray].
+    fn get_buffer_memory_size(&self) -> usize {
+        self.data.get_buffer_memory_size() + self.values().get_buffer_memory_size()
+    }
+
+    /// Returns the total number of bytes of memory occupied physically by this [DictionaryArray].
+    fn get_array_memory_size(&self) -> usize {
+        self.data.get_array_memory_size()
+            + self.values().get_array_memory_size()
+            + mem::size_of_val(self)
+    }
 }
 
 impl<T: ArrowPrimitiveType> fmt::Debug for DictionaryArray<T> {
@@ -2379,6 +2471,13 @@ mod tests {
             assert!(arr.is_valid(i));
             assert_eq!(i as i32, arr.value(i));
         }
+
+        assert_eq!(64, arr.get_buffer_memory_size());
+        let internals_of_primitive_array = 8 + 72; // RawPtrBox & Arc<ArrayData> combined.
+        assert_eq!(
+            arr.get_buffer_memory_size() + internals_of_primitive_array,
+            arr.get_array_memory_size()
+        );
     }
 
     #[test]
@@ -2398,6 +2497,13 @@ mod tests {
                 assert!(!arr.is_valid(i));
             }
         }
+
+        assert_eq!(128, arr.get_buffer_memory_size());
+        let internals_of_primitive_array = 8 + 72 + 16; // RawPtrBox & Arc<ArrayData> and it's null_bitmap combined.
+        assert_eq!(
+            arr.get_buffer_memory_size() + internals_of_primitive_array,
+            arr.get_array_memory_size()
+        );
     }
 
     #[test]
diff --git a/rust/arrow/src/array/data.rs b/rust/arrow/src/array/data.rs
index f8bf5cf..5f6e5dc 100644
--- a/rust/arrow/src/array/data.rs
+++ b/rust/arrow/src/array/data.rs
@@ -18,6 +18,7 @@
 //! Contains `ArrayData`, a generic representation of Arrow array data which encapsulates
 //! common attributes and operations for Arrow array.
 
+use std::mem;
 use std::sync::Arc;
 
 use crate::bitmap::Bitmap;
@@ -159,6 +160,45 @@ impl ArrayData {
     pub fn null_count(&self) -> usize {
         self.null_count
     }
+
+    /// Returns the total number of bytes of memory occupied by the buffers owned by this [ArrayData].
+    pub fn get_buffer_memory_size(&self) -> usize {
+        let mut size = 0;
+        for buffer in &self.buffers {
+            size += buffer.capacity();
+        }
+        if let Some(bitmap) = &self.null_bitmap {
+            size += bitmap.get_buffer_memory_size()
+        }
+        for child in &self.child_data {
+            size += child.get_buffer_memory_size();
+        }
+        size
+    }
+
+    /// Returns the total number of bytes of memory occupied physically by this [ArrayData].
+    pub fn get_array_memory_size(&self) -> usize {
+        let mut size = 0;
+        // Calculate size of the fields that don't have [get_array_memory_size] method internally.
+        size += mem::size_of_val(self)
+            - mem::size_of_val(&self.buffers)
+            - mem::size_of_val(&self.null_bitmap)
+            - mem::size_of_val(&self.child_data);
+
+        // Calculate rest of the fields top down which contain actual data
+        for buffer in &self.buffers {
+            size += mem::size_of_val(&buffer);
+            size += buffer.capacity();
+        }
+        if let Some(bitmap) = &self.null_bitmap {
+            size += bitmap.get_array_memory_size()
+        }
+        for child in &self.child_data {
+            size += child.get_array_memory_size();
+        }
+
+        size
+    }
 }
 
 /// Builder for `ArrayData` type
diff --git a/rust/arrow/src/array/null.rs b/rust/arrow/src/array/null.rs
index 14e1dd7..867bd7c 100644
--- a/rust/arrow/src/array/null.rs
+++ b/rust/arrow/src/array/null.rs
@@ -36,6 +36,7 @@
 
 use std::any::Any;
 use std::fmt;
+use std::mem;
 
 use crate::array::{Array, ArrayData, ArrayDataRef};
 use crate::datatypes::*;
@@ -83,6 +84,16 @@ impl Array for NullArray {
     fn null_count(&self) -> usize {
         self.data().len()
     }
+
+    /// Returns the total number of bytes of memory occupied by the buffers owned by this [NullArray].
+    fn get_buffer_memory_size(&self) -> usize {
+        self.data.get_buffer_memory_size()
+    }
+
+    /// Returns the total number of bytes of memory occupied physically by this [NullArray].
+    fn get_array_memory_size(&self) -> usize {
+        self.data.get_array_memory_size() + mem::size_of_val(self)
+    }
 }
 
 impl From<ArrayDataRef> for NullArray {
@@ -112,11 +123,18 @@ mod tests {
 
     #[test]
     fn test_null_array() {
-        let array1 = NullArray::new(32);
+        let null_arr = NullArray::new(32);
+
+        assert_eq!(null_arr.len(), 32);
+        assert_eq!(null_arr.null_count(), 32);
+        assert_eq!(null_arr.is_valid(0), false);
 
-        assert_eq!(array1.len(), 32);
-        assert_eq!(array1.null_count(), 32);
-        assert_eq!(array1.is_valid(0), false);
+        assert_eq!(0, null_arr.get_buffer_memory_size());
+        let internals_of_null_array = 64; // Arc<ArrayData>
+        assert_eq!(
+            null_arr.get_buffer_memory_size() + internals_of_null_array,
+            null_arr.get_array_memory_size()
+        );
     }
 
     #[test]
diff --git a/rust/arrow/src/array/union.rs b/rust/arrow/src/array/union.rs
index 9e8f450..9bbf64e 100644
--- a/rust/arrow/src/array/union.rs
+++ b/rust/arrow/src/array/union.rs
@@ -86,6 +86,7 @@ use crate::util::bit_util;
 use core::fmt;
 use std::any::Any;
 use std::collections::HashMap;
+use std::mem;
 use std::mem::size_of;
 
 /// An Array that can represent slots of varying types
@@ -296,6 +297,25 @@ impl Array for UnionArray {
     fn data_ref(&self) -> &ArrayDataRef {
         &self.data
     }
+
+    /// Returns the total number of bytes of memory occupied by the buffers owned by this [UnionArray].
+    fn get_buffer_memory_size(&self) -> usize {
+        let mut size = self.data.get_buffer_memory_size();
+        for field in &self.boxed_fields {
+            size += field.get_buffer_memory_size();
+        }
+        size
+    }
+
+    /// Returns the total number of bytes of memory occupied physically by this [UnionArray].
+    fn get_array_memory_size(&self) -> usize {
+        let mut size = self.data.get_array_memory_size();
+        size += mem::size_of_val(self) - mem::size_of_val(&self.boxed_fields);
+        for field in &self.boxed_fields {
+            size += field.get_array_memory_size();
+        }
+        size
+    }
 }
 
 impl fmt::Debug for UnionArray {
@@ -675,6 +695,16 @@ mod tests {
             let value = slot.value(0);
             assert_eq!(expected_value, &value);
         }
+
+        assert_eq!(
+            4 * 8 * 4 * mem::size_of::<i32>(),
+            union.get_buffer_memory_size()
+        );
+        let internals_of_union_array = (8 + 72) + (union.boxed_fields.len() * 144); // Arc<ArrayData> & Vec<ArrayRef> combined.
+        assert_eq!(
+            union.get_buffer_memory_size() + internals_of_union_array,
+            union.get_array_memory_size()
+        );
     }
 
     #[test]
diff --git a/rust/arrow/src/bitmap.rs b/rust/arrow/src/bitmap.rs
index 06412af..e9060e6 100644
--- a/rust/arrow/src/bitmap.rs
+++ b/rust/arrow/src/bitmap.rs
@@ -21,6 +21,7 @@
 use crate::buffer::Buffer;
 use crate::error::Result;
 use crate::util::bit_util;
+use std::mem;
 
 use std::ops::{BitAnd, BitOr};
 
@@ -67,6 +68,16 @@ impl Bitmap {
     pub fn into_buffer(self) -> Buffer {
         self.bits
     }
+
+    /// Returns the total number of bytes of memory occupied by the buffers owned by this [Bitmap].
+    pub fn get_buffer_memory_size(&self) -> usize {
+        self.bits.capacity()
+    }
+
+    /// Returns the total number of bytes of memory occupied physically by this [Bitmap].
+    pub fn get_array_memory_size(&self) -> usize {
+        self.bits.capacity() + mem::size_of_val(self)
+    }
 }
 
 impl<'a, 'b> BitAnd<&'b Bitmap> for &'a Bitmap {