You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by si...@apache.org on 2019/02/12 20:01:35 UTC

[arrow] branch master updated: ARROW-4468: [Rust] Implement BitAnd/BitOr for &Buffer (with SIMD) (#3571)

This is an automated email from the ASF dual-hosted git repository.

siddteotia pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 3ceef46  ARROW-4468: [Rust] Implement BitAnd/BitOr for &Buffer (with SIMD) (#3571)
3ceef46 is described below

commit 3ceef460086335e2346979c66c2b3a97c27c3d87
Author: Paddy Horan <pa...@hotmail.com>
AuthorDate: Tue Feb 12 15:01:30 2019 -0500

    ARROW-4468: [Rust] Implement BitAnd/BitOr for &Buffer (with SIMD) (#3571)
    
    * Added (SIMD) bitwise AND/OR for `Buffer`
    
    * Added benchmark
    
    * Removed boolean kernels in favor of traits.
    
    * Updated typo.
    
    * Cleaned up code.
    
    * Updated benchmark to include bitwise OR op
---
 rust/arrow/Cargo.toml             |   7 ++-
 rust/arrow/benches/bitwise_ops.rs |  75 +++++++++++++++++++++++
 rust/arrow/src/buffer.rs          | 121 ++++++++++++++++++++++++++++++++++++++
 rust/arrow/src/util/bit_util.rs   |  42 +++++++++++++
 4 files changed, 244 insertions(+), 1 deletion(-)

diff --git a/rust/arrow/Cargo.toml b/rust/arrow/Cargo.toml
index 1ebd4e6..6e2d483 100644
--- a/rust/arrow/Cargo.toml
+++ b/rust/arrow/Cargo.toml
@@ -45,6 +45,7 @@ csv = "1.0.0"
 num = "0.2"
 regex = "1.1"
 lazy_static = "1.2"
+packed_simd = "0.3.1"
 
 [dev-dependencies]
 criterion = "0.2"
@@ -56,4 +57,8 @@ harness = false
 
 [[bench]]
 name = "builder"
-harness = false
\ No newline at end of file
+harness = false
+
+[[bench]]
+name = "bitwise_ops"
+harness = false
diff --git a/rust/arrow/benches/bitwise_ops.rs b/rust/arrow/benches/bitwise_ops.rs
new file mode 100644
index 0000000..434ff4d
--- /dev/null
+++ b/rust/arrow/benches/bitwise_ops.rs
@@ -0,0 +1,75 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#[macro_use]
+extern crate criterion;
+use criterion::Criterion;
+
+extern crate arrow;
+
+use arrow::buffer::Buffer;
+use arrow::builder::{BufferBuilderTrait, UInt8BufferBuilder};
+
+fn create_buffer(size: usize) -> Buffer {
+    let mut builder = UInt8BufferBuilder::new(size);
+    for _i in 0..size {
+        builder.append(1_u8).unwrap();
+    }
+    builder.finish()
+}
+
+fn bitwise_default<F>(size: usize, op: F)
+where
+    F: Fn(&u8, &u8) -> u8,
+{
+    let buffer_a = create_buffer(size);
+    let buffer_b = create_buffer(size);
+
+    criterion::black_box({
+        let mut builder = UInt8BufferBuilder::new(buffer_a.len());
+        for i in 0..buffer_a.len() {
+            unsafe {
+                builder
+                    .append(op(
+                        buffer_a.data().get_unchecked(i),
+                        buffer_b.data().get_unchecked(i),
+                    ))
+                    .unwrap();
+            }
+        }
+        builder.finish()
+    });
+}
+
+fn bitwise_simd<F>(size: usize, op: F)
+where
+    F: Fn(&Buffer, &Buffer) -> Buffer,
+{
+    let buffer_a = create_buffer(size);
+    let buffer_b = create_buffer(size);
+    criterion::black_box(op(&buffer_a, &buffer_b));
+}
+
+fn add_benchmark(c: &mut Criterion) {
+    c.bench_function("add", |b| b.iter(|| bitwise_default(512, |a, b| a & b)));
+    c.bench_function("add simd", |b| b.iter(|| bitwise_simd(512, |a, b| a & b)));
+    c.bench_function("or", |b| b.iter(|| bitwise_default(512, |a, b| a | b)));
+    c.bench_function("or simd", |b| b.iter(|| bitwise_simd(512, |a, b| a | b)));
+}
+
+criterion_group!(benches, add_benchmark);
+criterion_main!(benches);
diff --git a/rust/arrow/src/buffer.rs b/rust/arrow/src/buffer.rs
index 70aea63..447a575 100644
--- a/rust/arrow/src/buffer.rs
+++ b/rust/arrow/src/buffer.rs
@@ -18,12 +18,17 @@
 //! The main type in the module is `Buffer`, a contiguous immutable memory region of
 //! fixed size aligned at a 64-byte boundary. `MutableBuffer` is like `Buffer`, but it can
 //! be mutated and grown.
+//!
+use packed_simd::u8x64;
 
 use std::cmp;
 use std::io::{Error as IoError, ErrorKind, Result as IoResult, Write};
 use std::mem;
+use std::ops::{BitAnd, BitOr};
+use std::slice::{from_raw_parts, from_raw_parts_mut};
 use std::sync::Arc;
 
+use crate::builder::{BufferBuilderTrait, UInt8BufferBuilder};
 use crate::error::Result;
 use crate::memory;
 use crate::util::bit_util;
@@ -141,6 +146,100 @@ impl<T: AsRef<[u8]>> From<T> for Buffer {
     }
 }
 
+///  Helper function for SIMD `BitAnd` and `BitOr` implementations
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+fn bitwise_bin_op_simd_helper<F>(left: &Buffer, right: &Buffer, op: F) -> Buffer
+where
+    F: Fn(u8x64, u8x64) -> u8x64,
+{
+    let mut result = MutableBuffer::new(left.len()).with_bitset(left.len(), false);
+    let lanes = u8x64::lanes();
+    for i in (0..left.len()).step_by(lanes) {
+        let left_data =
+            unsafe { from_raw_parts(left.raw_data().offset(i as isize), lanes) };
+        let right_data =
+            unsafe { from_raw_parts(right.raw_data().offset(i as isize), lanes) };
+        let result_slice: &mut [u8] = unsafe {
+            from_raw_parts_mut(
+                (result.data_mut().as_mut_ptr() as *mut u8).offset(i as isize),
+                lanes,
+            )
+        };
+        unsafe {
+            bit_util::bitwise_bin_op_simd(&left_data, &right_data, result_slice, &op)
+        };
+    }
+    return result.freeze();
+}
+
+impl<'a, 'b> BitAnd<&'b Buffer> for &'a Buffer {
+    type Output = Buffer;
+
+    fn bitand(self, rhs: &'b Buffer) -> Buffer {
+        assert_eq!(
+            self.len(),
+            rhs.len(),
+            "Buffers must be the same size to apply Bitwise AND."
+        );
+
+        // SIMD implementation if available
+        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+        {
+            return bitwise_bin_op_simd_helper(&self, &rhs, |a, b| a & b);
+        }
+
+        // Default implementation
+        #[allow(unreachable_code)]
+        {
+            let mut builder = UInt8BufferBuilder::new(self.len());
+            for i in 0..self.len() {
+                unsafe {
+                    builder
+                        .append(
+                            self.data().get_unchecked(i) & rhs.data().get_unchecked(i),
+                        )
+                        .unwrap();
+                }
+            }
+            builder.finish()
+        }
+    }
+}
+
+impl<'a, 'b> BitOr<&'b Buffer> for &'a Buffer {
+    type Output = Buffer;
+
+    fn bitor(self, rhs: &'b Buffer) -> Buffer {
+        assert_eq!(
+            self.len(),
+            rhs.len(),
+            "Buffers must be the same size to apply Bitwise OR."
+        );
+
+        // SIMD implementation if available
+        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+        {
+            return bitwise_bin_op_simd_helper(&self, &rhs, |a, b| a | b);
+        }
+
+        // Default implementation
+        #[allow(unreachable_code)]
+        {
+            let mut builder = UInt8BufferBuilder::new(self.len());
+            for i in 0..self.len() {
+                unsafe {
+                    builder
+                        .append(
+                            self.data().get_unchecked(i) | rhs.data().get_unchecked(i),
+                        )
+                        .unwrap();
+                }
+            }
+            builder.finish()
+        }
+    }
+}
+
 unsafe impl Sync for Buffer {}
 unsafe impl Send for Buffer {}
 
@@ -435,6 +534,28 @@ mod tests {
     }
 
     #[test]
+    fn test_bitwise_and() {
+        let buf1 = Buffer::from([0b01101010]);
+        let buf2 = Buffer::from([0b01001110]);
+        assert_eq!(Buffer::from([0b01001010]), &buf1 & &buf2);
+    }
+
+    #[test]
+    fn test_bitwise_or() {
+        let buf1 = Buffer::from([0b01101010]);
+        let buf2 = Buffer::from([0b01001110]);
+        assert_eq!(Buffer::from([0b01101110]), &buf1 | &buf2);
+    }
+
+    #[test]
+    #[should_panic(expected = "Buffers must be the same size to apply Bitwise OR.")]
+    fn test_buffer_bitand_different_sizes() {
+        let buf1 = Buffer::from([1_u8, 1_u8]);
+        let buf2 = Buffer::from([0b01001110]);
+        let _buf3 = &buf1 | &buf2;
+    }
+
+    #[test]
     fn test_mutable_new() {
         let buf = MutableBuffer::new(63);
         assert_eq!(64, buf.capacity());
diff --git a/rust/arrow/src/util/bit_util.rs b/rust/arrow/src/util/bit_util.rs
index 4674783..23bc3d5 100644
--- a/rust/arrow/src/util/bit_util.rs
+++ b/rust/arrow/src/util/bit_util.rs
@@ -17,6 +17,8 @@
 
 //! Utils for working with bits
 
+use packed_simd::u8x64;
+
 static BIT_MASK: [u8; 8] = [1, 2, 4, 8, 16, 32, 64, 128];
 
 static POPCOUNT_TABLE: [u8; 256] = [
@@ -117,6 +119,22 @@ pub fn ceil(value: usize, divisor: usize) -> usize {
     result
 }
 
+/// Performs SIMD bitwise binary operations.
+///
+/// Note that each slice should be 64 bytes and it is the callers responsibility to ensure that
+/// this is the case.  If passed slices larger than 64 bytes the operation will only be performed
+/// on the first 64 bytes.  Slices less than 64 bytes will panic.
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+pub unsafe fn bitwise_bin_op_simd<F>(left: &[u8], right: &[u8], result: &mut [u8], op: F)
+where
+    F: Fn(u8x64, u8x64) -> u8x64,
+{
+    let left_simd = u8x64::from_slice_unaligned_unchecked(left);
+    let right_simd = u8x64::from_slice_unaligned_unchecked(right);
+    let simd_result = op(left_simd, right_simd);
+    simd_result.write_to_slice_unaligned_unchecked(result);
+}
+
 #[cfg(test)]
 mod tests {
     use rand::{thread_rng, Rng};
@@ -270,4 +288,28 @@ mod tests {
         assert_eq!(ceil(10, 10000000000), 1);
         assert_eq!(ceil(10000000000, 1000000000), 10);
     }
+
+    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+    #[test]
+    fn test_bitwise_and_simd() {
+        let buf1 = [0b00110011u8; 64];
+        let buf2 = [0b11110000u8; 64];
+        let mut buf3 = [0b00000000; 64];
+        unsafe { bitwise_bin_op_simd(&buf1, &buf2, &mut buf3, |a, b| a & b) };
+        for i in buf3.iter() {
+            assert_eq!(&0b00110000u8, i);
+        }
+    }
+
+    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+    #[test]
+    fn test_bitwise_or_simd() {
+        let buf1 = [0b00110011u8; 64];
+        let buf2 = [0b11110000u8; 64];
+        let mut buf3 = [0b00000000; 64];
+        unsafe { bitwise_bin_op_simd(&buf1, &buf2, &mut buf3, |a, b| a | b) };
+        for i in buf3.iter() {
+            assert_eq!(&0b11110011u8, i);
+        }
+    }
 }