You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by vi...@apache.org on 2022/12/10 17:41:44 UTC

[arrow-rs] branch master updated: Add bloom filter benchmark (#3323)

This is an automated email from the ASF dual-hosted git repository.

viirya pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new ad94368a7 Add bloom filter benchmark (#3323)
ad94368a7 is described below

commit ad94368a722ca8d78a47f929a90775b669421691
Author: Liang-Chi Hsieh <vi...@gmail.com>
AuthorDate: Sat Dec 10 09:41:37 2022 -0800

    Add bloom filter benchmark (#3323)
---
 parquet/benches/arrow_writer.rs | 40 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 39 insertions(+), 1 deletion(-)

diff --git a/parquet/benches/arrow_writer.rs b/parquet/benches/arrow_writer.rs
index ddca1e53c..676debf5c 100644
--- a/parquet/benches/arrow_writer.rs
+++ b/parquet/benches/arrow_writer.rs
@@ -26,6 +26,7 @@ use std::sync::Arc;
 
 use arrow::datatypes::*;
 use arrow::{record_batch::RecordBatch, util::data_gen::*};
+use parquet::file::properties::WriterProperties;
 use parquet::{arrow::ArrowWriter, errors::Result};
 
 fn create_primitive_bench_batch(
@@ -294,9 +295,26 @@ fn _create_nested_bench_batch(
 
 #[inline]
 fn write_batch(batch: &RecordBatch) -> Result<()> {
+    write_batch_with_option(batch, None)
+}
+
+#[inline]
+fn write_batch_enable_bloom_filter(batch: &RecordBatch) -> Result<()> {
+    let option = WriterProperties::builder()
+        .set_bloom_filter_enabled(true)
+        .build();
+
+    write_batch_with_option(batch, Some(option))
+}
+
+#[inline]
+fn write_batch_with_option(
+    batch: &RecordBatch,
+    props: Option<WriterProperties>,
+) -> Result<()> {
     // Write batch to an in-memory writer
     let buffer = vec![];
-    let mut writer = ArrowWriter::try_new(buffer, batch.schema(), None)?;
+    let mut writer = ArrowWriter::try_new(buffer, batch.schema(), props)?;
 
     writer.write(batch)?;
     writer.close()?;
@@ -317,6 +335,10 @@ fn bench_primitive_writer(c: &mut Criterion) {
         b.iter(|| write_batch(&batch).unwrap())
     });
 
+    group.bench_function("4096 values primitive with bloom filter", |b| {
+        b.iter(|| write_batch_enable_bloom_filter(&batch).unwrap())
+    });
+
     let batch = create_primitive_bench_batch_non_null(4096, 0.25, 0.75).unwrap();
     group.throughput(Throughput::Bytes(
         batch
@@ -329,6 +351,10 @@ fn bench_primitive_writer(c: &mut Criterion) {
         b.iter(|| write_batch(&batch).unwrap())
     });
 
+    group.bench_function("4096 values primitive non-null with bloom filter", |b| {
+        b.iter(|| write_batch_enable_bloom_filter(&batch).unwrap())
+    });
+
     let batch = create_bool_bench_batch(4096, 0.25, 0.75).unwrap();
     group.throughput(Throughput::Bytes(
         batch
@@ -365,6 +391,10 @@ fn bench_primitive_writer(c: &mut Criterion) {
         b.iter(|| write_batch(&batch).unwrap())
     });
 
+    group.bench_function("4096 values string with bloom filter", |b| {
+        b.iter(|| write_batch_enable_bloom_filter(&batch).unwrap())
+    });
+
     let batch = create_string_dictionary_bench_batch(4096, 0.25, 0.75).unwrap();
     group.throughput(Throughput::Bytes(
         batch
@@ -377,6 +407,10 @@ fn bench_primitive_writer(c: &mut Criterion) {
         b.iter(|| write_batch(&batch).unwrap())
     });
 
+    group.bench_function("4096 values string dictionary with bloom filter", |b| {
+        b.iter(|| write_batch_enable_bloom_filter(&batch).unwrap())
+    });
+
     let batch = create_string_bench_batch_non_null(4096, 0.25, 0.75).unwrap();
     group.throughput(Throughput::Bytes(
         batch
@@ -389,6 +423,10 @@ fn bench_primitive_writer(c: &mut Criterion) {
         b.iter(|| write_batch(&batch).unwrap())
     });
 
+    group.bench_function("4096 values string non-null with bloom filter", |b| {
+        b.iter(|| write_batch_enable_bloom_filter(&batch).unwrap())
+    });
+
     group.finish();
 }