You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2022/06/28 06:34:58 UTC
[arrow-rs] branch master updated: fix bug: write column metadata to the behind of the column chunk data (#1947)
This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 464e8d199 fix bug: write column metadata to the behind of the column chunk data (#1947)
464e8d199 is described below
commit 464e8d199d0dd875b09241cc0608da73d6771e29
Author: Kun Liu <li...@apache.org>
AuthorDate: Tue Jun 28 14:34:53 2022 +0800
fix bug: write column metadata to the behind of the column chunk data (#1947)
---
parquet/src/file/metadata.rs | 31 ++++++++++++++++++-------------
parquet/src/file/writer.rs | 17 ++++++-----------
2 files changed, 24 insertions(+), 24 deletions(-)
diff --git a/parquet/src/file/metadata.rs b/parquet/src/file/metadata.rs
index a3477dd75..4d9842c0e 100644
--- a/parquet/src/file/metadata.rs
+++ b/parquet/src/file/metadata.rs
@@ -579,7 +579,24 @@ impl ColumnChunkMetaData {
/// Method to convert to Thrift.
pub fn to_thrift(&self) -> ColumnChunk {
- let column_metadata = ColumnMetaData {
+ let column_metadata = self.to_column_metadata_thrift();
+
+ ColumnChunk {
+ file_path: self.file_path().map(|s| s.to_owned()),
+ file_offset: self.file_offset,
+ meta_data: Some(column_metadata),
+ offset_index_offset: self.offset_index_offset,
+ offset_index_length: self.offset_index_length,
+ column_index_offset: self.column_index_offset,
+ column_index_length: self.column_index_length,
+ crypto_metadata: None,
+ encrypted_column_metadata: None,
+ }
+ }
+
+ /// Method to convert to Thrift `ColumnMetaData`
+ pub fn to_column_metadata_thrift(&self) -> ColumnMetaData {
+ ColumnMetaData {
type_: self.column_type.into(),
encodings: self.encodings().iter().map(|&v| v.into()).collect(),
path_in_schema: Vec::from(self.column_path.as_ref()),
@@ -597,18 +614,6 @@ impl ColumnChunkMetaData {
.as_ref()
.map(|vec| vec.iter().map(page_encoding_stats::to_thrift).collect()),
bloom_filter_offset: self.bloom_filter_offset,
- };
-
- ColumnChunk {
- file_path: self.file_path().map(|s| s.to_owned()),
- file_offset: self.file_offset,
- meta_data: Some(column_metadata),
- offset_index_offset: self.offset_index_offset,
- offset_index_length: self.offset_index_length,
- column_index_offset: self.column_index_offset,
- column_index_length: self.column_index_length,
- crypto_metadata: None,
- encrypted_column_metadata: None,
}
}
}
diff --git a/parquet/src/file/writer.rs b/parquet/src/file/writer.rs
index 0a8fc331e..b503c264d 100644
--- a/parquet/src/file/writer.rs
+++ b/parquet/src/file/writer.rs
@@ -434,16 +434,6 @@ impl<'a, W: Write> SerializedPageWriter<'a, W> {
}
Ok(self.sink.bytes_written() - start_pos)
}
-
- /// Serializes column chunk into Thrift.
- /// Returns Ok() if there are not errors serializing and writing data into the sink.
- #[inline]
- fn serialize_column_chunk(&mut self, chunk: parquet::ColumnChunk) -> Result<()> {
- let mut protocol = TCompactOutputProtocol::new(&mut self.sink);
- chunk.write_to_out_protocol(&mut protocol)?;
- protocol.flush()?;
- Ok(())
- }
}
impl<'a, W: Write> PageWriter for SerializedPageWriter<'a, W> {
@@ -533,7 +523,12 @@ impl<'a, W: Write> PageWriter for SerializedPageWriter<'a, W> {
}
fn write_metadata(&mut self, metadata: &ColumnChunkMetaData) -> Result<()> {
- self.serialize_column_chunk(metadata.to_thrift())
+ let mut protocol = TCompactOutputProtocol::new(&mut self.sink);
+ metadata
+ .to_column_metadata_thrift()
+ .write_to_out_protocol(&mut protocol)?;
+ protocol.flush()?;
+ Ok(())
}
fn close(&mut self) -> Result<()> {