You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ji...@apache.org on 2022/11/09 12:51:50 UTC

[arrow-rs] 02/02: create new function to facilitate fixture test

This is an automated email from the ASF dual-hosted git repository.

jiayuliu pushed a commit to branch bloom-filter-reader
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git

commit 01aed8377b4d9555405860b1bad1c6c3dbbf84f3
Author: Jiayu Liu <ji...@hey.com>
AuthorDate: Wed Nov 9 20:51:31 2022 +0800

    create new function to facilitate fixture test
---
 parquet/src/bloom_filter/mod.rs | 40 +++++++++++++++++++++++++++++-----------
 1 file changed, 29 insertions(+), 11 deletions(-)

diff --git a/parquet/src/bloom_filter/mod.rs b/parquet/src/bloom_filter/mod.rs
index 9166eadc3..e32a1d348 100644
--- a/parquet/src/bloom_filter/mod.rs
+++ b/parquet/src/bloom_filter/mod.rs
@@ -85,6 +85,20 @@ fn block_check(block: &Block, hash: u32) -> bool {
 pub(crate) struct Sbbf(Vec<Block>);
 
 impl Sbbf {
+    fn new(bitset: &[u8]) -> Self {
+        let data = bitset
+            .chunks_exact(4 * 8)
+            .map(|chunk| {
+                let mut block = [0_u32; 8];
+                for (i, word) in chunk.chunks_exact(4).enumerate() {
+                    block[i] = u32::from_le_bytes(word.try_into().unwrap());
+                }
+                block
+            })
+            .collect::<Vec<Block>>();
+        Self(data)
+    }
+
     pub fn read_from_column_chunk<R: Read + Seek>(
         column_metadata: &ColumnChunkMetaData,
         mut reader: &mut R,
@@ -119,17 +133,7 @@ impl Sbbf {
         reader.read_exact(&mut buffer).map_err(|e| {
             ParquetError::General(format!("Could not read bloom filter: {}", e))
         })?;
-        let data = buffer
-            .chunks_exact(4 * 8)
-            .map(|chunk| {
-                let mut block = [0_u32; 8];
-                for (i, word) in chunk.chunks_exact(4).enumerate() {
-                    block[i] = u32::from_le_bytes(word.try_into().unwrap());
-                }
-                block
-            })
-            .collect::<Vec<Block>>();
-        Ok(Self(data))
+        Ok(Self::new(&buffer))
     }
 
     #[inline]
@@ -197,4 +201,18 @@ mod tests {
             assert!(sbbf.check(i));
         }
     }
+
+    #[test]
+    fn test_with_fixture() {
+        let bitset: &[u8] = &[
+            200, 1, 80, 20, 64, 68, 8, 109, 6, 37, 4, 67, 144, 80, 96, 32, 8, 132, 43,
+            33, 0, 5, 99, 65, 2, 0, 224, 44, 64, 78, 96, 4,
+        ];
+        let sbbf = Sbbf::new(bitset);
+        for a in 0..10i64 {
+            let value = format!("a{}", a);
+            let hash = hash_bytes(value);
+            assert!(sbbf.check(hash));
+        }
+    }
 }