You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2023/04/14 21:22:22 UTC
[arrow-rs] branch master updated: Include byte offsets in parquet-layout (#4086)
This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 3ce9b9777 Include byte offsets in parquet-layout (#4086)
3ce9b9777 is described below
commit 3ce9b9777f5b048586c397148e7c057ab79cb032
Author: Raphael Taylor-Davies <17...@users.noreply.github.com>
AuthorDate: Fri Apr 14 22:22:15 2023 +0100
Include byte offsets in parquet-layout (#4086)
---
parquet/src/bin/parquet-layout.rs | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/parquet/src/bin/parquet-layout.rs b/parquet/src/bin/parquet-layout.rs
index 7278c718c..5f71551e1 100644
--- a/parquet/src/bin/parquet-layout.rs
+++ b/parquet/src/bin/parquet-layout.rs
@@ -70,6 +70,7 @@ struct Page {
compression: Option<&'static str>,
encoding: &'static str,
page_type: &'static str,
+ offset: u64,
compressed_bytes: i32,
uncompressed_bytes: i32,
header_bytes: i32,
@@ -104,6 +105,7 @@ fn do_layout<C: ChunkReader>(reader: &C) -> Result<ParquetFile> {
compression,
encoding: encoding(dictionary.encoding),
page_type: "dictionary",
+ offset: start,
compressed_bytes: header.compressed_page_size,
uncompressed_bytes: header.uncompressed_page_size,
header_bytes: header_len as _,
@@ -114,6 +116,7 @@ fn do_layout<C: ChunkReader>(reader: &C) -> Result<ParquetFile> {
compression,
encoding: encoding(data_page.encoding),
page_type: "data_page_v1",
+ offset: start,
compressed_bytes: header.compressed_page_size,
uncompressed_bytes: header.uncompressed_page_size,
header_bytes: header_len as _,
@@ -126,6 +129,7 @@ fn do_layout<C: ChunkReader>(reader: &C) -> Result<ParquetFile> {
compression: compression.filter(|_| is_compressed),
encoding: encoding(data_page.encoding),
page_type: "data_page_v2",
+ offset: start,
compressed_bytes: header.compressed_page_size,
uncompressed_bytes: header.uncompressed_page_size,
header_bytes: header_len as _,