You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by dh...@apache.org on 2021/07/30 19:35:19 UTC
[arrow-rs] branch master updated: Speed up filter_record_batch with
one array (#637)
This is an automated email from the ASF dual-hosted git repository.
dheres pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new e84fe20 Speed up filter_record_batch with one array (#637)
e84fe20 is described below
commit e84fe2050fb6c898f0c963230da22876ca3c018f
Author: Daniƫl Heres <da...@gmail.com>
AuthorDate: Fri Jul 30 21:30:33 2021 +0200
Speed up filter_record_batch with one array (#637)
* Speed up filter_record_batch with one array
* Don't into()
---
arrow/benches/filter_kernels.rs | 19 +++++++++++++++++--
arrow/src/compute/kernels/filter.rs | 21 +++++++++++++++------
2 files changed, 32 insertions(+), 8 deletions(-)
diff --git a/arrow/benches/filter_kernels.rs b/arrow/benches/filter_kernels.rs
index ca0b097..d5ff09c 100644
--- a/arrow/benches/filter_kernels.rs
+++ b/arrow/benches/filter_kernels.rs
@@ -16,12 +16,15 @@
// under the License.
extern crate arrow;
-use arrow::compute::Filter;
+use std::sync::Arc;
+
+use arrow::compute::{filter_record_batch, Filter};
+use arrow::record_batch::RecordBatch;
use arrow::util::bench_util::*;
use arrow::array::*;
use arrow::compute::{build_filter, filter};
-use arrow::datatypes::{Float32Type, UInt8Type};
+use arrow::datatypes::{Field, Float32Type, Schema, UInt8Type};
use criterion::{criterion_group, criterion_main, Criterion};
@@ -100,6 +103,18 @@ fn add_benchmark(c: &mut Criterion) {
c.bench_function("filter context string low selectivity", |b| {
b.iter(|| bench_built_filter(&sparse_filter, &data_array))
});
+
+ let data_array = create_primitive_array::<Float32Type>(size, 0.0);
+
+ let field = Field::new("c1", data_array.data_type().clone(), true);
+ let schema = Schema::new(vec![field]);
+
+ let batch =
+ RecordBatch::try_new(Arc::new(schema), vec![Arc::new(data_array)]).unwrap();
+
+ c.bench_function("filter single record batch", |b| {
+ b.iter(|| filter_record_batch(&batch, &filter_array))
+ });
}
criterion_group!(benches, add_benchmark);
diff --git a/arrow/src/compute/kernels/filter.rs b/arrow/src/compute/kernels/filter.rs
index 075943c..55b1cd1 100644
--- a/arrow/src/compute/kernels/filter.rs
+++ b/arrow/src/compute/kernels/filter.rs
@@ -288,12 +288,21 @@ pub fn filter_record_batch(
return filter_record_batch(record_batch, &predicate);
}
- let filter = build_filter(predicate)?;
- let filtered_arrays = record_batch
- .columns()
- .iter()
- .map(|a| make_array(filter(a.data())))
- .collect();
+ let num_colums = record_batch.columns().len();
+
+ let filtered_arrays = match num_colums {
+ 1 => {
+ vec![filter(record_batch.columns()[0].as_ref(), predicate)?]
+ }
+ _ => {
+ let filter = build_filter(predicate)?;
+ record_batch
+ .columns()
+ .iter()
+ .map(|a| make_array(filter(a.data())))
+ .collect()
+ }
+ };
RecordBatch::try_new(record_batch.schema(), filtered_arrays)
}