You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2022/11/23 13:15:46 UTC
[arrow-rs] branch master updated: Add Row size methods (#3160) (#3163)
This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 12a67b9bc Add Row size methods (#3160) (#3163)
12a67b9bc is described below
commit 12a67b9bc7e1538f5af1a189cc0a78c14d551897
Author: Raphael Taylor-Davies <17...@users.noreply.github.com>
AuthorDate: Wed Nov 23 13:15:39 2022 +0000
Add Row size methods (#3160) (#3163)
* Add Row size methods (#3160)
* Fix copypasta
* Fix
---
arrow/src/row/interner.rs | 21 +++++++++++++++++++++
arrow/src/row/mod.rs | 33 +++++++++++++++++++++++++++++++++
2 files changed, 54 insertions(+)
diff --git a/arrow/src/row/interner.rs b/arrow/src/row/interner.rs
index e6c8f0972..1c71b6a55 100644
--- a/arrow/src/row/interner.rs
+++ b/arrow/src/row/interner.rs
@@ -157,6 +157,15 @@ impl OrderPreservingInterner {
pub fn value(&self, key: Interned) -> &[u8] {
self.values.index(key)
}
+
+ /// Returns the size of this instance in bytes including self
+ pub fn size(&self) -> usize {
+ std::mem::size_of::<Self>()
+ + self.keys.buffer_size()
+ + self.values.buffer_size()
+ + self.bucket.size()
+ + self.lookup.capacity() * std::mem::size_of::<Interned>()
+ }
}
/// A buffer of `[u8]` indexed by `[Interned]`
@@ -192,6 +201,11 @@ impl InternBuffer {
self.offsets.push(self.values.len());
key
}
+
+ /// Returns the byte size of the associated buffers
+ fn buffer_size(&self) -> usize {
+ self.values.capacity() + self.offsets.capacity() * std::mem::size_of::<usize>()
+ }
}
impl Index<Interned> for InternBuffer {
@@ -324,6 +338,13 @@ impl Bucket {
}
}
}
+
+ /// Returns the size of this instance in bytes
+ fn size(&self) -> usize {
+ std::mem::size_of::<Self>()
+ + self.slots.capacity() * std::mem::size_of::<Slot>()
+ + self.next.as_ref().map(|x| x.size()).unwrap_or_default()
+ }
}
#[cfg(test)]
diff --git a/arrow/src/row/mod.rs b/arrow/src/row/mod.rs
index 1d0a58d95..c57fd41eb 100644
--- a/arrow/src/row/mod.rs
+++ b/arrow/src/row/mod.rs
@@ -358,6 +358,14 @@ impl SortField {
pub fn new_with_options(data_type: DataType, options: SortOptions) -> Self {
Self { options, data_type }
}
+
+ /// Return size of this instance in bytes.
+ ///
+ /// Includes the size of `Self`.
+ pub fn size(&self) -> usize {
+ self.data_type.size() + std::mem::size_of::<Self>()
+ - std::mem::size_of::<DataType>()
+ }
}
impl RowConverter {
@@ -480,6 +488,21 @@ impl RowConverter {
})
.collect()
}
+
+ /// Returns the size of this instance in bytes
+ ///
+ /// Includes the size of `Self`.
+ pub fn size(&self) -> usize {
+ std::mem::size_of::<Self>()
+ + self.fields.iter().map(|x| x.size()).sum::<usize>()
+ + self.interners.capacity()
+ * std::mem::size_of::<Option<Box<OrderPreservingInterner>>>()
+ + self
+ .interners
+ .iter()
+ .filter_map(|x| x.as_ref().map(|x| x.size()))
+ .sum::<usize>()
+ }
}
/// A row-oriented representation of arrow data, that is normalized for comparison.
@@ -512,6 +535,16 @@ impl Rows {
pub fn iter(&self) -> RowsIter<'_> {
self.into_iter()
}
+
+ /// Returns the size of this instance in bytes
+ ///
+ /// Includes the size of `Self`.
+ pub fn size(&self) -> usize {
+ // Size of fields is accounted for as part of RowConverter
+ std::mem::size_of::<Self>()
+ + self.buffer.len()
+ + self.offsets.len() * std::mem::size_of::<usize>()
+ }
}
impl<'a> IntoIterator for &'a Rows {