You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2022/10/29 03:25:00 UTC

[arrow-rs] branch master updated: Move `byte_size` from datafusion::physical_expr (#2965)

This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 04e224cd3 Move `byte_size` from datafusion::physical_expr (#2965)
04e224cd3 is described below

commit 04e224cd3762539b078bc28cdad2f73767c328ff
Author: Brent Gardner <br...@spaceandtime.io>
AuthorDate: Fri Oct 28 21:24:55 2022 -0600

    Move `byte_size` from datafusion::physical_expr (#2965)
    
    * Move `byte_size` from datafusion::physical_expr
    
    * lint
    
    * PR feedback
---
 arrow-array/src/record_batch.rs | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/arrow-array/src/record_batch.rs b/arrow-array/src/record_batch.rs
index 58462449e..e613a38bb 100644
--- a/arrow-array/src/record_batch.rs
+++ b/arrow-array/src/record_batch.rs
@@ -377,6 +377,14 @@ impl RecordBatch {
         let schema = Arc::new(Schema::new(fields));
         RecordBatch::try_new(schema, columns)
     }
+
+    /// Returns the total number of bytes of memory occupied physically by this batch.
+    pub fn get_array_memory_size(&self) -> usize {
+        self.columns()
+            .iter()
+            .map(|array| array.get_array_memory_size())
+            .sum()
+    }
 }
 
 /// Options that control the behaviour used when creating a [`RecordBatch`].
@@ -471,6 +479,22 @@ mod tests {
         check_batch(record_batch, 5)
     }
 
+    #[test]
+    fn byte_size_should_not_regress() {
+        let schema = Schema::new(vec![
+            Field::new("a", DataType::Int32, false),
+            Field::new("b", DataType::Utf8, false),
+        ]);
+
+        let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
+        let b = StringArray::from(vec!["a", "b", "c", "d", "e"]);
+
+        let record_batch =
+            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a), Arc::new(b)])
+                .unwrap();
+        assert_eq!(record_batch.get_array_memory_size(), 592);
+    }
+
     fn check_batch(record_batch: RecordBatch, num_rows: usize) {
         assert_eq!(num_rows, record_batch.num_rows());
         assert_eq!(2, record_batch.num_columns());