You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2023/04/14 21:22:22 UTC

[arrow-rs] branch master updated: Include byte offsets in parquet-layout (#4086)

This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 3ce9b9777 Include byte offsets in parquet-layout (#4086)
3ce9b9777 is described below

commit 3ce9b9777f5b048586c397148e7c057ab79cb032
Author: Raphael Taylor-Davies <17...@users.noreply.github.com>
AuthorDate: Fri Apr 14 22:22:15 2023 +0100

    Include byte offsets in parquet-layout (#4086)
---
 parquet/src/bin/parquet-layout.rs | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/parquet/src/bin/parquet-layout.rs b/parquet/src/bin/parquet-layout.rs
index 7278c718c..5f71551e1 100644
--- a/parquet/src/bin/parquet-layout.rs
+++ b/parquet/src/bin/parquet-layout.rs
@@ -70,6 +70,7 @@ struct Page {
     compression: Option<&'static str>,
     encoding: &'static str,
     page_type: &'static str,
+    offset: u64,
     compressed_bytes: i32,
     uncompressed_bytes: i32,
     header_bytes: i32,
@@ -104,6 +105,7 @@ fn do_layout<C: ChunkReader>(reader: &C) -> Result<ParquetFile> {
                                 compression,
                                 encoding: encoding(dictionary.encoding),
                                 page_type: "dictionary",
+                                offset: start,
                                 compressed_bytes: header.compressed_page_size,
                                 uncompressed_bytes: header.uncompressed_page_size,
                                 header_bytes: header_len as _,
@@ -114,6 +116,7 @@ fn do_layout<C: ChunkReader>(reader: &C) -> Result<ParquetFile> {
                                 compression,
                                 encoding: encoding(data_page.encoding),
                                 page_type: "data_page_v1",
+                                offset: start,
                                 compressed_bytes: header.compressed_page_size,
                                 uncompressed_bytes: header.uncompressed_page_size,
                                 header_bytes: header_len as _,
@@ -126,6 +129,7 @@ fn do_layout<C: ChunkReader>(reader: &C) -> Result<ParquetFile> {
                                 compression: compression.filter(|_| is_compressed),
                                 encoding: encoding(data_page.encoding),
                                 page_type: "data_page_v2",
+                                offset: start,
                                 compressed_bytes: header.compressed_page_size,
                                 uncompressed_bytes: header.uncompressed_page_size,
                                 header_bytes: header_len as _,