You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2020/10/17 16:53:12 UTC

[GitHub] [arrow] nevi-me commented on a change in pull request #8300: ARROW-10135: [Rust] [Parquet] Refactor file module to help adding sources

nevi-me commented on a change in pull request #8300:
URL: https://github.com/apache/arrow/pull/8300#discussion_r506961922



##########
File path: rust/parquet/src/file/reader.rs
##########
@@ -18,35 +18,37 @@
 //! Contains file reader API and provides methods to access file metadata, row group
 //! readers to read individual column chunks, or access record iterator.
 
-use std::{
-    convert::TryFrom,
-    fs::File,
-    io::{BufReader, Cursor, Read, Seek, SeekFrom},
-    path::Path,
-    rc::Rc,
-};
+use std::{boxed::Box, io::Read, rc::Rc};
 
-use byteorder::{ByteOrder, LittleEndian};
-use parquet_format::{
-    ColumnOrder as TColumnOrder, FileMetaData as TFileMetaData, PageHeader, PageType,
-};
-use thrift::protocol::TCompactInputProtocol;
-
-use crate::basic::{ColumnOrder, Compression, Encoding, Type};
 use crate::column::page::PageIterator;
-use crate::column::{
-    page::{Page, PageReader},
-    reader::{ColumnReader, ColumnReaderImpl},
-};
-use crate::compression::{create_codec, Codec};
+use crate::column::{page::PageReader, reader::ColumnReader};
 use crate::errors::{ParquetError, Result};
-use crate::file::{metadata::*, statistics, FOOTER_SIZE, PARQUET_MAGIC};
+use crate::file::metadata::*;
+pub use crate::file::serialized_reader::{SerializedFileReader, SerializedPageReader};
 use crate::record::reader::RowIter;
-use crate::record::Row;
-use crate::schema::types::{
-    self, ColumnDescPtr, SchemaDescPtr, SchemaDescriptor, Type as SchemaType,
-};
-use crate::util::{io::FileSource, memory::ByteBufferPtr};
+use crate::schema::types::{ColumnDescPtr, SchemaDescPtr, Type as SchemaType};
+
+use crate::basic::Type;
+
+use crate::column::reader::ColumnReaderImpl;
+
+/// Length should return the amount of bytes that implementor contains.
+/// It's mainly used to read the metadata, which is at the end of the source.
+#[allow(clippy::len_without_is_empty)]
+pub trait Length {
+    /// Returns the amount of bytes of the inner source.
+    fn len(&self) -> u64;
+}
+
+/// The ChunkReader trait generates readers of chunks of a source.
+/// For a file system reader, each chunk might contain a clone of File bounded on a given range.
+/// For an object store reader, each read can be mapped to a range request.
+pub trait ChunkReader: Length {
+    type T: Read;
+    /// get a serialy readeable slice of the current reader
+    /// This should fail if the slice exceeds the current bounds
+    fn get_read(&self, start: u64, length: usize) -> Result<Self::T>;
+}
 
 // ----------------------------------------------------------------------

Review comment:
       @alamb is this still an issue?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org