You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2022/05/02 17:07:37 UTC

[arrow-rs] branch master updated: expose row-group flush in public api (#1634)

This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 20cc4aa7e expose row-group flush in public api (#1634)
20cc4aa7e is described below

commit 20cc4aa7ee823b7c3dbddfb0823b6a48ca5f0f14
Author: Kamil Konior <57...@users.noreply.github.com>
AuthorDate: Mon May 2 19:07:31 2022 +0200

    expose row-group flush in public api (#1634)
    
    * expose row-group flush in public api
    
    * a try to improve method names
    
    * tiny refactors, beautifying code
---
 parquet/src/arrow/arrow_writer.rs | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/parquet/src/arrow/arrow_writer.rs b/parquet/src/arrow/arrow_writer.rs
index 3a53c2cf3..7ddd64432 100644
--- a/parquet/src/arrow/arrow_writer.rs
+++ b/parquet/src/arrow/arrow_writer.rs
@@ -113,7 +113,6 @@ impl<W: 'static + ParquetWriter> ArrowWriter<W> {
         }
 
         self.buffered_rows += batch.num_rows();
-
         self.flush_completed()?;
 
         Ok(())
@@ -122,13 +121,18 @@ impl<W: 'static + ParquetWriter> ArrowWriter<W> {
     /// Flushes buffered data until there are less than `max_row_group_size` rows buffered
     fn flush_completed(&mut self) -> Result<()> {
         while self.buffered_rows >= self.max_row_group_size {
-            self.flush_row_group(self.max_row_group_size)?;
+            self.flush_rows(self.max_row_group_size)?;
         }
         Ok(())
     }
 
+    /// Flushes all buffered rows into a new row group
+    pub fn flush(&mut self) -> Result<()> {
+        self.flush_rows(self.buffered_rows)
+    }
+
     /// Flushes `num_rows` from the buffer into a new row group
-    fn flush_row_group(&mut self, num_rows: usize) -> Result<()> {
+    fn flush_rows(&mut self, num_rows: usize) -> Result<()> {
         if num_rows == 0 {
             return Ok(());
         }
@@ -192,8 +196,7 @@ impl<W: 'static + ParquetWriter> ArrowWriter<W> {
 
     /// Close and finalize the underlying Parquet writer
     pub fn close(&mut self) -> Result<parquet_format::FileMetaData> {
-        self.flush_completed()?;
-        self.flush_row_group(self.buffered_rows)?;
+        self.flush()?;
         self.writer.close()
     }
 }