You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2022/06/28 06:34:58 UTC

[arrow-rs] branch master updated: fix bug: write column metadata to the behind of the column chunk data (#1947)

This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 464e8d199 fix bug: write column metadata to the behind of the column chunk data (#1947)
464e8d199 is described below

commit 464e8d199d0dd875b09241cc0608da73d6771e29
Author: Kun Liu <li...@apache.org>
AuthorDate: Tue Jun 28 14:34:53 2022 +0800

    fix bug: write column metadata to the behind of the column chunk data (#1947)
---
 parquet/src/file/metadata.rs | 31 ++++++++++++++++++-------------
 parquet/src/file/writer.rs   | 17 ++++++-----------
 2 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/parquet/src/file/metadata.rs b/parquet/src/file/metadata.rs
index a3477dd75..4d9842c0e 100644
--- a/parquet/src/file/metadata.rs
+++ b/parquet/src/file/metadata.rs
@@ -579,7 +579,24 @@ impl ColumnChunkMetaData {
 
     /// Method to convert to Thrift.
     pub fn to_thrift(&self) -> ColumnChunk {
-        let column_metadata = ColumnMetaData {
+        let column_metadata = self.to_column_metadata_thrift();
+
+        ColumnChunk {
+            file_path: self.file_path().map(|s| s.to_owned()),
+            file_offset: self.file_offset,
+            meta_data: Some(column_metadata),
+            offset_index_offset: self.offset_index_offset,
+            offset_index_length: self.offset_index_length,
+            column_index_offset: self.column_index_offset,
+            column_index_length: self.column_index_length,
+            crypto_metadata: None,
+            encrypted_column_metadata: None,
+        }
+    }
+
+    /// Method to convert to Thrift `ColumnMetaData`
+    pub fn to_column_metadata_thrift(&self) -> ColumnMetaData {
+        ColumnMetaData {
             type_: self.column_type.into(),
             encodings: self.encodings().iter().map(|&v| v.into()).collect(),
             path_in_schema: Vec::from(self.column_path.as_ref()),
@@ -597,18 +614,6 @@ impl ColumnChunkMetaData {
                 .as_ref()
                 .map(|vec| vec.iter().map(page_encoding_stats::to_thrift).collect()),
             bloom_filter_offset: self.bloom_filter_offset,
-        };
-
-        ColumnChunk {
-            file_path: self.file_path().map(|s| s.to_owned()),
-            file_offset: self.file_offset,
-            meta_data: Some(column_metadata),
-            offset_index_offset: self.offset_index_offset,
-            offset_index_length: self.offset_index_length,
-            column_index_offset: self.column_index_offset,
-            column_index_length: self.column_index_length,
-            crypto_metadata: None,
-            encrypted_column_metadata: None,
         }
     }
 }
diff --git a/parquet/src/file/writer.rs b/parquet/src/file/writer.rs
index 0a8fc331e..b503c264d 100644
--- a/parquet/src/file/writer.rs
+++ b/parquet/src/file/writer.rs
@@ -434,16 +434,6 @@ impl<'a, W: Write> SerializedPageWriter<'a, W> {
         }
         Ok(self.sink.bytes_written() - start_pos)
     }
-
-    /// Serializes column chunk into Thrift.
-    /// Returns Ok() if there are not errors serializing and writing data into the sink.
-    #[inline]
-    fn serialize_column_chunk(&mut self, chunk: parquet::ColumnChunk) -> Result<()> {
-        let mut protocol = TCompactOutputProtocol::new(&mut self.sink);
-        chunk.write_to_out_protocol(&mut protocol)?;
-        protocol.flush()?;
-        Ok(())
-    }
 }
 
 impl<'a, W: Write> PageWriter for SerializedPageWriter<'a, W> {
@@ -533,7 +523,12 @@ impl<'a, W: Write> PageWriter for SerializedPageWriter<'a, W> {
     }
 
     fn write_metadata(&mut self, metadata: &ColumnChunkMetaData) -> Result<()> {
-        self.serialize_column_chunk(metadata.to_thrift())
+        let mut protocol = TCompactOutputProtocol::new(&mut self.sink);
+        metadata
+            .to_column_metadata_thrift()
+            .write_to_out_protocol(&mut protocol)?;
+        protocol.flush()?;
+        Ok(())
     }
 
     fn close(&mut self) -> Result<()> {