You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2022/11/23 13:15:46 UTC

[arrow-rs] branch master updated: Add Row size methods (#3160) (#3163)

This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 12a67b9bc Add Row size methods (#3160) (#3163)
12a67b9bc is described below

commit 12a67b9bc7e1538f5af1a189cc0a78c14d551897
Author: Raphael Taylor-Davies <17...@users.noreply.github.com>
AuthorDate: Wed Nov 23 13:15:39 2022 +0000

    Add Row size methods (#3160) (#3163)
    
    * Add Row size methods (#3160)
    
    * Fix copypasta
    
    * Fix
---
 arrow/src/row/interner.rs | 21 +++++++++++++++++++++
 arrow/src/row/mod.rs      | 33 +++++++++++++++++++++++++++++++++
 2 files changed, 54 insertions(+)

diff --git a/arrow/src/row/interner.rs b/arrow/src/row/interner.rs
index e6c8f0972..1c71b6a55 100644
--- a/arrow/src/row/interner.rs
+++ b/arrow/src/row/interner.rs
@@ -157,6 +157,15 @@ impl OrderPreservingInterner {
     pub fn value(&self, key: Interned) -> &[u8] {
         self.values.index(key)
     }
+
+    /// Returns the size of this instance in bytes including self
+    pub fn size(&self) -> usize {
+        std::mem::size_of::<Self>()
+            + self.keys.buffer_size()
+            + self.values.buffer_size()
+            + self.bucket.size()
+            + self.lookup.capacity() * std::mem::size_of::<Interned>()
+    }
 }
 
 /// A buffer of `[u8]` indexed by `[Interned]`
@@ -192,6 +201,11 @@ impl InternBuffer {
         self.offsets.push(self.values.len());
         key
     }
+
+    /// Returns the byte size of the associated buffers
+    fn buffer_size(&self) -> usize {
+        self.values.capacity() + self.offsets.capacity() * std::mem::size_of::<usize>()
+    }
 }
 
 impl Index<Interned> for InternBuffer {
@@ -324,6 +338,13 @@ impl Bucket {
             }
         }
     }
+
+    /// Returns the size of this instance in bytes
+    fn size(&self) -> usize {
+        std::mem::size_of::<Self>()
+            + self.slots.capacity() * std::mem::size_of::<Slot>()
+            + self.next.as_ref().map(|x| x.size()).unwrap_or_default()
+    }
 }
 
 #[cfg(test)]
diff --git a/arrow/src/row/mod.rs b/arrow/src/row/mod.rs
index 1d0a58d95..c57fd41eb 100644
--- a/arrow/src/row/mod.rs
+++ b/arrow/src/row/mod.rs
@@ -358,6 +358,14 @@ impl SortField {
     pub fn new_with_options(data_type: DataType, options: SortOptions) -> Self {
         Self { options, data_type }
     }
+
+    /// Return size of this instance in bytes.
+    ///
+    /// Includes the size of `Self`.
+    pub fn size(&self) -> usize {
+        self.data_type.size() + std::mem::size_of::<Self>()
+            - std::mem::size_of::<DataType>()
+    }
 }
 
 impl RowConverter {
@@ -480,6 +488,21 @@ impl RowConverter {
             })
             .collect()
     }
+
+    /// Returns the size of this instance in bytes
+    ///
+    /// Includes the size of `Self`.
+    pub fn size(&self) -> usize {
+        std::mem::size_of::<Self>()
+            + self.fields.iter().map(|x| x.size()).sum::<usize>()
+            + self.interners.capacity()
+                * std::mem::size_of::<Option<Box<OrderPreservingInterner>>>()
+            + self
+                .interners
+                .iter()
+                .filter_map(|x| x.as_ref().map(|x| x.size()))
+                .sum::<usize>()
+    }
 }
 
 /// A row-oriented representation of arrow data, that is normalized for comparison.
@@ -512,6 +535,16 @@ impl Rows {
     pub fn iter(&self) -> RowsIter<'_> {
         self.into_iter()
     }
+
+    /// Returns the size of this instance in bytes
+    ///
+    /// Includes the size of `Self`.
+    pub fn size(&self) -> usize {
+        // Size of fields is accounted for as part of RowConverter
+        std::mem::size_of::<Self>()
+            + self.buffer.len()
+            + self.offsets.len() * std::mem::size_of::<usize>()
+    }
 }
 
 impl<'a> IntoIterator for &'a Rows {