You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2021/08/05 13:22:48 UTC

[arrow-rs] branch cherry_pick_e84fe205 created (now bcc6912)

This is an automated email from the ASF dual-hosted git repository.

alamb pushed a change to branch cherry_pick_e84fe205
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git.


      at bcc6912  Speed up filter_record_batch with one array (#637)

This branch includes the following new commits:

     new bcc6912  Speed up filter_record_batch with one array (#637)

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


[arrow-rs] 01/01: Speed up filter_record_batch with one array (#637)

Posted by al...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch cherry_pick_e84fe205
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git

commit bcc69129c2ee0b466cf3cd27799a9fdf0c4d5fdb
Author: Daniƫl Heres <da...@gmail.com>
AuthorDate: Fri Jul 30 21:30:33 2021 +0200

    Speed up filter_record_batch with one array (#637)
    
    * Speed up filter_record_batch with one array
    
    * Don't into()
---
 arrow/benches/filter_kernels.rs     | 19 +++++++++++++++++--
 arrow/src/compute/kernels/filter.rs | 21 +++++++++++++++------
 2 files changed, 32 insertions(+), 8 deletions(-)

diff --git a/arrow/benches/filter_kernels.rs b/arrow/benches/filter_kernels.rs
index ca0b097..d5ff09c 100644
--- a/arrow/benches/filter_kernels.rs
+++ b/arrow/benches/filter_kernels.rs
@@ -16,12 +16,15 @@
 // under the License.
 extern crate arrow;
 
-use arrow::compute::Filter;
+use std::sync::Arc;
+
+use arrow::compute::{filter_record_batch, Filter};
+use arrow::record_batch::RecordBatch;
 use arrow::util::bench_util::*;
 
 use arrow::array::*;
 use arrow::compute::{build_filter, filter};
-use arrow::datatypes::{Float32Type, UInt8Type};
+use arrow::datatypes::{Field, Float32Type, Schema, UInt8Type};
 
 use criterion::{criterion_group, criterion_main, Criterion};
 
@@ -100,6 +103,18 @@ fn add_benchmark(c: &mut Criterion) {
     c.bench_function("filter context string low selectivity", |b| {
         b.iter(|| bench_built_filter(&sparse_filter, &data_array))
     });
+
+    let data_array = create_primitive_array::<Float32Type>(size, 0.0);
+
+    let field = Field::new("c1", data_array.data_type().clone(), true);
+    let schema = Schema::new(vec![field]);
+
+    let batch =
+        RecordBatch::try_new(Arc::new(schema), vec![Arc::new(data_array)]).unwrap();
+
+    c.bench_function("filter single record batch", |b| {
+        b.iter(|| filter_record_batch(&batch, &filter_array))
+    });
 }
 
 criterion_group!(benches, add_benchmark);
diff --git a/arrow/src/compute/kernels/filter.rs b/arrow/src/compute/kernels/filter.rs
index 075943c..55b1cd1 100644
--- a/arrow/src/compute/kernels/filter.rs
+++ b/arrow/src/compute/kernels/filter.rs
@@ -288,12 +288,21 @@ pub fn filter_record_batch(
         return filter_record_batch(record_batch, &predicate);
     }
 
-    let filter = build_filter(predicate)?;
-    let filtered_arrays = record_batch
-        .columns()
-        .iter()
-        .map(|a| make_array(filter(a.data())))
-        .collect();
+    let num_colums = record_batch.columns().len();
+
+    let filtered_arrays = match num_colums {
+        1 => {
+            vec![filter(record_batch.columns()[0].as_ref(), predicate)?]
+        }
+        _ => {
+            let filter = build_filter(predicate)?;
+            record_batch
+                .columns()
+                .iter()
+                .map(|a| make_array(filter(a.data())))
+                .collect()
+        }
+    };
     RecordBatch::try_new(record_batch.schema(), filtered_arrays)
 }