You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ji...@apache.org on 2022/11/13 13:25:17 UTC

[arrow-rs] 02/02: add api

This is an automated email from the ASF dual-hosted git repository.

jiayuliu pushed a commit to branch add-bloom-filter-2
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git

commit d3d407b293091bd71c04f865b0c7c896ac52d452
Author: Jiayu Liu <ji...@hey.com>
AuthorDate: Sun Nov 13 13:24:10 2022 +0000

    add api
---
 parquet/src/file/reader.rs            |  6 ++++++
 parquet/src/file/serialized_reader.rs | 15 +++++++++++----
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/parquet/src/file/reader.rs b/parquet/src/file/reader.rs
index 70ff37a41..325944c21 100644
--- a/parquet/src/file/reader.rs
+++ b/parquet/src/file/reader.rs
@@ -21,6 +21,8 @@
 use bytes::Bytes;
 use std::{boxed::Box, io::Read, sync::Arc};
 
+#[cfg(feature = "bloom")]
+use crate::bloom_filter::Sbbf;
 use crate::column::page::PageIterator;
 use crate::column::{page::PageReader, reader::ColumnReader};
 use crate::errors::{ParquetError, Result};
@@ -143,6 +145,10 @@ pub trait RowGroupReader: Send + Sync {
         Ok(col_reader)
     }
 
+    #[cfg(feature = "bloom")]
+    /// Get bloom filter for the `i`th column chunk, if present.
+    fn get_column_bloom_filter(&self, i: usize) -> Result<Option<Sbbf>>;
+
     /// Get iterator of `Row`s from this row group.
     ///
     /// Projected schema can be a subset of or equal to the file schema, when it is None,
diff --git a/parquet/src/file/serialized_reader.rs b/parquet/src/file/serialized_reader.rs
index a400d4dab..8cefe1c5e 100644
--- a/parquet/src/file/serialized_reader.rs
+++ b/parquet/src/file/serialized_reader.rs
@@ -22,11 +22,9 @@ use std::collections::VecDeque;
 use std::io::Cursor;
 use std::{convert::TryFrom, fs::File, io::Read, path::Path, sync::Arc};
 
-use crate::format::{PageHeader, PageLocation, PageType};
-use bytes::{Buf, Bytes};
-use thrift::protocol::TCompactInputProtocol;
-
 use crate::basic::{Encoding, Type};
+#[cfg(feature = "bloom")]
+use crate::bloom_filter::Sbbf;
 use crate::column::page::{Page, PageMetadata, PageReader};
 use crate::compression::{create_codec, Codec};
 use crate::errors::{ParquetError, Result};
@@ -38,10 +36,13 @@ use crate::file::{
     reader::*,
     statistics,
 };
+use crate::format::{PageHeader, PageLocation, PageType};
 use crate::record::reader::RowIter;
 use crate::record::Row;
 use crate::schema::types::Type as SchemaType;
 use crate::util::{io::TryClone, memory::ByteBufferPtr};
+use bytes::{Buf, Bytes};
+use thrift::protocol::TCompactInputProtocol;
 // export `SliceableCursor` and `FileSource` publically so clients can
 // re-use the logic in their own ParquetFileWriter wrappers
 pub use crate::util::io::FileSource;
@@ -387,6 +388,12 @@ impl<'a, R: 'static + ChunkReader> RowGroupReader for SerializedRowGroupReader<'
         )?))
     }
 
+    #[cfg(feature = "bloom")]
+    /// get bloom filter for the ith column
+    fn get_column_bloom_filter(&self, i: usize) -> Result<Option<Sbbf>> {
+        todo!()
+    }
+
     fn get_row_iter(&self, projection: Option<SchemaType>) -> Result<RowIter> {
         RowIter::from_row_group(projection, self)
     }