You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by jo...@apache.org on 2021/01/09 11:06:29 UTC

[arrow] branch master updated: ARROW-11131: [Rust] Improve performance of boolean_equal

This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new c72784d  ARROW-11131: [Rust] Improve performance of boolean_equal
c72784d is described below

commit c72784d370234f2fba68a0a6994c1a8dc073bccd
Author: mqy <me...@gmail.com>
AuthorDate: Sat Jan 9 12:05:22 2021 +0100

    ARROW-11131: [Rust] Improve performance of boolean_equal
    
    This PR optimized performance of `boolean_equal` and added benches for this.
    The average change is about -95% for both `equal_bool_512` and `equal_bool_513`.
    
    Closes #9124 from mqy/improve_boolean_equal
    
    Authored-by: mqy <me...@gmail.com>
    Signed-off-by: Jorge C. Leitao <jo...@gmail.com>
---
 rust/arrow/benches/equal.rs           | 18 ++++++++++++
 rust/arrow/src/array/equal/boolean.rs | 52 +++++++++++++++++++++++++----------
 rust/arrow/src/array/equal/mod.rs     | 14 ++++++++++
 3 files changed, 69 insertions(+), 15 deletions(-)

diff --git a/rust/arrow/benches/equal.rs b/rust/arrow/benches/equal.rs
index 6783662..519c49f 100644
--- a/rust/arrow/benches/equal.rs
+++ b/rust/arrow/benches/equal.rs
@@ -50,6 +50,18 @@ fn create_string_array(size: usize, with_nulls: bool) -> ArrayRef {
     Arc::new(builder.finish())
 }
 
+fn create_bool_array(size: usize) -> ArrayRef {
+    // use random numbers to avoid spurious compiler optimizations wrt to branching
+    let mut rng = seedable_rng();
+    let mut builder = BooleanBuilder::new(size);
+
+    for _ in 0..size {
+        let val = rng.gen::<i32>();
+        builder.append_value(val % 2 == 0).unwrap();
+    }
+    Arc::new(builder.finish())
+}
+
 fn create_array(size: usize, with_nulls: bool) -> ArrayRef {
     // use random numbers to avoid spurious compiler optimizations wrt to branching
     let mut rng = seedable_rng();
@@ -83,6 +95,12 @@ fn add_benchmark(c: &mut Criterion) {
     c.bench_function("equal_string_nulls_512", |b| {
         b.iter(|| bench_equal(&arr_a_nulls))
     });
+
+    let arr_a = create_bool_array(512);
+    c.bench_function("equal_bool_512", |b| b.iter(|| bench_equal(&arr_a)));
+
+    let arr_a = create_bool_array(513);
+    c.bench_function("equal_bool_513", |b| b.iter(|| bench_equal(&arr_a)));
 }
 
 criterion_group!(benches, add_benchmark);
diff --git a/rust/arrow/src/array/equal/boolean.rs b/rust/arrow/src/array/equal/boolean.rs
index ef241e8..e777d4d 100644
--- a/rust/arrow/src/array/equal/boolean.rs
+++ b/rust/arrow/src/array/equal/boolean.rs
@@ -19,16 +19,16 @@ use crate::array::{data::count_nulls, ArrayData};
 use crate::buffer::Buffer;
 use crate::util::bit_util::get_bit;
 
-use super::utils::equal_bits;
+use super::utils::{equal_bits, equal_len};
 
 pub(super) fn boolean_equal(
     lhs: &ArrayData,
     rhs: &ArrayData,
     lhs_nulls: Option<&Buffer>,
     rhs_nulls: Option<&Buffer>,
-    lhs_start: usize,
-    rhs_start: usize,
-    len: usize,
+    mut lhs_start: usize,
+    mut rhs_start: usize,
+    mut len: usize,
 ) -> bool {
     let lhs_values = lhs.buffers()[0].as_slice();
     let rhs_values = rhs.buffers()[0].as_slice();
@@ -37,18 +37,40 @@ pub(super) fn boolean_equal(
     let rhs_null_count = count_nulls(rhs_nulls, rhs_start, len);
 
     if lhs_null_count == 0 && rhs_null_count == 0 {
-        (0..len).all(|i| {
-            let lhs_pos = lhs_start + i;
-            let rhs_pos = rhs_start + i;
+        // Optimize performance for starting offset at u8 boundary.
+        if lhs_start % 8 == 0 && rhs_start % 8 == 0 {
+            let quot = len / 8;
+            if quot > 0
+                && !equal_len(
+                    lhs_values,
+                    rhs_values,
+                    lhs_start / 8 + lhs.offset(),
+                    rhs_start / 8 + rhs.offset(),
+                    quot,
+                )
+            {
+                return false;
+            }
 
-            equal_bits(
-                lhs_values,
-                rhs_values,
-                lhs_pos + lhs.offset(),
-                rhs_pos + rhs.offset(),
-                1,
-            )
-        })
+            // Calculate for suffix bits.
+            let rem = len % 8;
+            if rem == 0 {
+                return true;
+            } else {
+                let aligned_bits = len - rem;
+                lhs_start += aligned_bits;
+                rhs_start += aligned_bits;
+                len = rem
+            }
+        }
+
+        equal_bits(
+            lhs_values,
+            rhs_values,
+            lhs_start + lhs.offset(),
+            rhs_start + rhs.offset(),
+            len,
+        )
     } else {
         // get a ref of the null buffer bytes, to use in testing for nullness
         let lhs_null_bytes = lhs_nulls.as_ref().unwrap().as_slice();
diff --git a/rust/arrow/src/array/equal/mod.rs b/rust/arrow/src/array/equal/mod.rs
index 33977b4..e2ee9bc 100644
--- a/rust/arrow/src/array/equal/mod.rs
+++ b/rust/arrow/src/array/equal/mod.rs
@@ -360,6 +360,20 @@ mod tests {
         let b_slice = b.slice(3, 4);
         assert_eq!(equal(&a_slice, &b_slice), false);
         assert_eq!(equal(&b_slice, &a_slice), false);
+
+        // Test the optimization cases where null_count == 0 and starts at 0 and len >= size_of(u8)
+
+        // Elements fill in `u8`'s exactly.
+        let mut vector = vec![false, false, true, true, true, true, true, true];
+        let a = BooleanArray::from(vector.clone()).data();
+        let b = BooleanArray::from(vector.clone()).data();
+        test_equal(a.as_ref(), b.as_ref(), true);
+
+        // Elements fill in `u8`s + suffix bits.
+        vector.push(true);
+        let a = BooleanArray::from(vector.clone()).data();
+        let b = BooleanArray::from(vector).data();
+        test_equal(a.as_ref(), b.as_ref(), true);
     }
 
     #[test]