You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by dh...@apache.org on 2021/07/30 19:35:19 UTC

[arrow-rs] branch master updated: Speed up filter_record_batch with one array (#637)

This is an automated email from the ASF dual-hosted git repository.

dheres pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new e84fe20  Speed up filter_record_batch with one array (#637)
e84fe20 is described below

commit e84fe2050fb6c898f0c963230da22876ca3c018f
Author: Daniƫl Heres <da...@gmail.com>
AuthorDate: Fri Jul 30 21:30:33 2021 +0200

    Speed up filter_record_batch with one array (#637)
    
    * Speed up filter_record_batch with one array
    
    * Don't into()
---
 arrow/benches/filter_kernels.rs     | 19 +++++++++++++++++--
 arrow/src/compute/kernels/filter.rs | 21 +++++++++++++++------
 2 files changed, 32 insertions(+), 8 deletions(-)

diff --git a/arrow/benches/filter_kernels.rs b/arrow/benches/filter_kernels.rs
index ca0b097..d5ff09c 100644
--- a/arrow/benches/filter_kernels.rs
+++ b/arrow/benches/filter_kernels.rs
@@ -16,12 +16,15 @@
 // under the License.
 extern crate arrow;
 
-use arrow::compute::Filter;
+use std::sync::Arc;
+
+use arrow::compute::{filter_record_batch, Filter};
+use arrow::record_batch::RecordBatch;
 use arrow::util::bench_util::*;
 
 use arrow::array::*;
 use arrow::compute::{build_filter, filter};
-use arrow::datatypes::{Float32Type, UInt8Type};
+use arrow::datatypes::{Field, Float32Type, Schema, UInt8Type};
 
 use criterion::{criterion_group, criterion_main, Criterion};
 
@@ -100,6 +103,18 @@ fn add_benchmark(c: &mut Criterion) {
     c.bench_function("filter context string low selectivity", |b| {
         b.iter(|| bench_built_filter(&sparse_filter, &data_array))
     });
+
+    let data_array = create_primitive_array::<Float32Type>(size, 0.0);
+
+    let field = Field::new("c1", data_array.data_type().clone(), true);
+    let schema = Schema::new(vec![field]);
+
+    let batch =
+        RecordBatch::try_new(Arc::new(schema), vec![Arc::new(data_array)]).unwrap();
+
+    c.bench_function("filter single record batch", |b| {
+        b.iter(|| filter_record_batch(&batch, &filter_array))
+    });
 }
 
 criterion_group!(benches, add_benchmark);
diff --git a/arrow/src/compute/kernels/filter.rs b/arrow/src/compute/kernels/filter.rs
index 075943c..55b1cd1 100644
--- a/arrow/src/compute/kernels/filter.rs
+++ b/arrow/src/compute/kernels/filter.rs
@@ -288,12 +288,21 @@ pub fn filter_record_batch(
         return filter_record_batch(record_batch, &predicate);
     }
 
-    let filter = build_filter(predicate)?;
-    let filtered_arrays = record_batch
-        .columns()
-        .iter()
-        .map(|a| make_array(filter(a.data())))
-        .collect();
+    let num_colums = record_batch.columns().len();
+
+    let filtered_arrays = match num_colums {
+        1 => {
+            vec![filter(record_batch.columns()[0].as_ref(), predicate)?]
+        }
+        _ => {
+            let filter = build_filter(predicate)?;
+            record_batch
+                .columns()
+                .iter()
+                .map(|a| make_array(filter(a.data())))
+                .collect()
+        }
+    };
     RecordBatch::try_new(record_batch.schema(), filtered_arrays)
 }