You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ji...@apache.org on 2022/11/09 12:51:50 UTC
[arrow-rs] 02/02: create new function to facilitate fixture test
This is an automated email from the ASF dual-hosted git repository.
jiayuliu pushed a commit to branch bloom-filter-reader
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
commit 01aed8377b4d9555405860b1bad1c6c3dbbf84f3
Author: Jiayu Liu <ji...@hey.com>
AuthorDate: Wed Nov 9 20:51:31 2022 +0800
create new function to facilitate fixture test
---
parquet/src/bloom_filter/mod.rs | 40 +++++++++++++++++++++++++++++-----------
1 file changed, 29 insertions(+), 11 deletions(-)
diff --git a/parquet/src/bloom_filter/mod.rs b/parquet/src/bloom_filter/mod.rs
index 9166eadc3..e32a1d348 100644
--- a/parquet/src/bloom_filter/mod.rs
+++ b/parquet/src/bloom_filter/mod.rs
@@ -85,6 +85,20 @@ fn block_check(block: &Block, hash: u32) -> bool {
pub(crate) struct Sbbf(Vec<Block>);
impl Sbbf {
+ fn new(bitset: &[u8]) -> Self {
+ let data = bitset
+ .chunks_exact(4 * 8)
+ .map(|chunk| {
+ let mut block = [0_u32; 8];
+ for (i, word) in chunk.chunks_exact(4).enumerate() {
+ block[i] = u32::from_le_bytes(word.try_into().unwrap());
+ }
+ block
+ })
+ .collect::<Vec<Block>>();
+ Self(data)
+ }
+
pub fn read_from_column_chunk<R: Read + Seek>(
column_metadata: &ColumnChunkMetaData,
mut reader: &mut R,
@@ -119,17 +133,7 @@ impl Sbbf {
reader.read_exact(&mut buffer).map_err(|e| {
ParquetError::General(format!("Could not read bloom filter: {}", e))
})?;
- let data = buffer
- .chunks_exact(4 * 8)
- .map(|chunk| {
- let mut block = [0_u32; 8];
- for (i, word) in chunk.chunks_exact(4).enumerate() {
- block[i] = u32::from_le_bytes(word.try_into().unwrap());
- }
- block
- })
- .collect::<Vec<Block>>();
- Ok(Self(data))
+ Ok(Self::new(&buffer))
}
#[inline]
@@ -197,4 +201,18 @@ mod tests {
assert!(sbbf.check(i));
}
}
+
+ #[test]
+ fn test_with_fixture() {
+ let bitset: &[u8] = &[
+ 200, 1, 80, 20, 64, 68, 8, 109, 6, 37, 4, 67, 144, 80, 96, 32, 8, 132, 43,
+ 33, 0, 5, 99, 65, 2, 0, 224, 44, 64, 78, 96, 4,
+ ];
+ let sbbf = Sbbf::new(bitset);
+ for a in 0..10i64 {
+ let value = format!("a{}", a);
+ let hash = hash_bytes(value);
+ assert!(sbbf.check(hash));
+ }
+ }
}