You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by su...@apache.org on 2019/03/05 17:57:09 UTC
[arrow] branch master updated: ARROW-4769: [Rust] Improve array
limit fn where max_records >= len
This is an automated email from the ASF dual-hosted git repository.
sunchao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 22cdd42 ARROW-4769: [Rust] Improve array limit fn where max_records >= len
22cdd42 is described below
commit 22cdd4252371218ee254445ba8b69fea719cc16b
Author: Neville Dipale <ne...@gmail.com>
AuthorDate: Tue Mar 5 09:56:55 2019 -0800
ARROW-4769: [Rust] Improve array limit fn where max_records >= len
This yields a ~55% reduction in runtime.
Author: Neville Dipale <ne...@gmail.com>
Closes #3811 from nevi-me/ARROW-4769 and squashes the following commits:
6cfdfe41 <Neville Dipale> ARROW-4769: Improve array limit fn where max_records >= len
---
rust/arrow/benches/arithmetic_kernels.rs | 15 +++++++-
rust/arrow/src/compute/array_ops.rs | 59 +++++++++++++++++---------------
rust/datafusion/src/execution/limit.rs | 2 +-
3 files changed, 47 insertions(+), 29 deletions(-)
diff --git a/rust/arrow/benches/arithmetic_kernels.rs b/rust/arrow/benches/arithmetic_kernels.rs
index be6d0ae..dd1c435 100644
--- a/rust/arrow/benches/arithmetic_kernels.rs
+++ b/rust/arrow/benches/arithmetic_kernels.rs
@@ -19,12 +19,14 @@
extern crate criterion;
use criterion::Criterion;
+use std::sync::Arc;
+
extern crate arrow;
use arrow::array::*;
use arrow::builder::*;
use arrow::compute::arithmetic_kernels::*;
-use arrow::compute::array_ops::sum;
+use arrow::compute::array_ops::{limit, sum};
use arrow::error::Result;
fn create_array(size: usize) -> Float32Array {
@@ -67,6 +69,11 @@ fn sum_no_simd(size: usize) {
criterion::black_box(sum(&arr_a).unwrap());
}
+fn limit_no_simd(size: usize, max: usize) {
+ let arr_a: ArrayRef = Arc::new(create_array(size));
+ criterion::black_box(limit(&arr_a, max).unwrap());
+}
+
fn add_benchmark(c: &mut Criterion) {
c.bench_function("add 512", |b| {
b.iter(|| bin_op_no_simd(512, |a, b| Ok(a + b)))
@@ -81,6 +88,12 @@ fn add_benchmark(c: &mut Criterion) {
});
c.bench_function("multiply 512 simd", |b| b.iter(|| multiply_simd(512)));
c.bench_function("sum 512 no simd", |b| b.iter(|| sum_no_simd(512)));
+ c.bench_function("limit 512, 256 no simd", |b| {
+ b.iter(|| limit_no_simd(512, 256))
+ });
+ c.bench_function("limit 512, 512 no simd", |b| {
+ b.iter(|| limit_no_simd(512, 512))
+ });
}
criterion_group!(benches, add_benchmark);
diff --git a/rust/arrow/src/compute/array_ops.rs b/rust/arrow/src/compute/array_ops.rs
index dc1730f..088661d 100644
--- a/rust/arrow/src/compute/array_ops.rs
+++ b/rust/arrow/src/compute/array_ops.rs
@@ -17,7 +17,6 @@
//! Defines primitive computations on arrays, e.g. addition, equality, boolean logic.
-use std::cmp;
use std::ops::Add;
use std::sync::Arc;
@@ -201,26 +200,29 @@ macro_rules! limit_array {
/// Returns the array, taking only the number of elements specified
///
-/// Returns the whole array if the number of elements specified is larger than the length of the array
-pub fn limit(array: &Array, num_elements: usize) -> Result<ArrayRef> {
- let num_elements_safe: usize = cmp::min(array.len(), num_elements);
+/// Returns the whole array if the number of elements specified is larger than the length
+/// of the array
+pub fn limit(array: &ArrayRef, num_elements: usize) -> Result<ArrayRef> {
+ if num_elements >= array.len() {
+ return Ok(array.clone());
+ }
match array.data_type() {
- DataType::UInt8 => limit_array!(array, num_elements_safe, UInt8Array),
- DataType::UInt16 => limit_array!(array, num_elements_safe, UInt16Array),
- DataType::UInt32 => limit_array!(array, num_elements_safe, UInt32Array),
- DataType::UInt64 => limit_array!(array, num_elements_safe, UInt64Array),
- DataType::Int8 => limit_array!(array, num_elements_safe, Int8Array),
- DataType::Int16 => limit_array!(array, num_elements_safe, Int16Array),
- DataType::Int32 => limit_array!(array, num_elements_safe, Int32Array),
- DataType::Int64 => limit_array!(array, num_elements_safe, Int64Array),
- DataType::Float32 => limit_array!(array, num_elements_safe, Float32Array),
- DataType::Float64 => limit_array!(array, num_elements_safe, Float64Array),
- DataType::Boolean => limit_array!(array, num_elements_safe, BooleanArray),
+ DataType::UInt8 => limit_array!(array, num_elements, UInt8Array),
+ DataType::UInt16 => limit_array!(array, num_elements, UInt16Array),
+ DataType::UInt32 => limit_array!(array, num_elements, UInt32Array),
+ DataType::UInt64 => limit_array!(array, num_elements, UInt64Array),
+ DataType::Int8 => limit_array!(array, num_elements, Int8Array),
+ DataType::Int16 => limit_array!(array, num_elements, Int16Array),
+ DataType::Int32 => limit_array!(array, num_elements, Int32Array),
+ DataType::Int64 => limit_array!(array, num_elements, Int64Array),
+ DataType::Float32 => limit_array!(array, num_elements, Float32Array),
+ DataType::Float64 => limit_array!(array, num_elements, Float64Array),
+ DataType::Boolean => limit_array!(array, num_elements, BooleanArray),
DataType::Utf8 => {
let b = array.as_any().downcast_ref::<BinaryArray>().unwrap();
- let mut values: Vec<&[u8]> = Vec::with_capacity(num_elements_safe);
- for i in 0..num_elements_safe {
+ let mut values: Vec<&[u8]> = Vec::with_capacity(num_elements);
+ for i in 0..num_elements {
values.push(b.value(i));
}
Ok(Arc::new(BinaryArray::from(values)))
@@ -235,7 +237,9 @@ pub fn limit(array: &Array, num_elements: usize) -> Result<ArrayRef> {
#[cfg(test)]
mod tests {
use super::*;
- use crate::array::{Float64Array, Int32Array};
+ use crate::array::{ArrayRef, Float64Array, Int32Array};
+
+ use std::sync::Arc;
#[test]
fn test_primitive_array_sum() {
@@ -309,7 +313,7 @@ mod tests {
#[test]
fn test_limit_array() {
- let a = Int32Array::from(vec![5, 6, 7, 8, 9]);
+ let a: ArrayRef = Arc::new(Int32Array::from(vec![5, 6, 7, 8, 9]));
let b = limit(&a, 3).unwrap();
let c = b.as_ref().as_any().downcast_ref::<Int32Array>().unwrap();
assert_eq!(3, c.len());
@@ -320,7 +324,7 @@ mod tests {
#[test]
fn test_limit_binary_array() {
- let a = BinaryArray::from(vec!["hello", " ", "world", "!"]);
+ let a: ArrayRef = Arc::new(BinaryArray::from(vec!["hello", " ", "world", "!"]));
let b = limit(&a, 2).unwrap();
let c = b.as_ref().as_any().downcast_ref::<BinaryArray>().unwrap();
assert_eq!(2, c.len());
@@ -330,7 +334,7 @@ mod tests {
#[test]
fn test_limit_array_with_null() {
- let a = Int32Array::from(vec![None, Some(5)]);
+ let a: ArrayRef = Arc::new(Int32Array::from(vec![None, Some(5)]));
let b = limit(&a, 1).unwrap();
let c = b.as_ref().as_any().downcast_ref::<Int32Array>().unwrap();
assert_eq!(1, c.len());
@@ -340,14 +344,15 @@ mod tests {
#[test]
fn test_limit_array_with_limit_too_large() {
let a = Int32Array::from(vec![5, 6, 7, 8, 9]);
- let b = limit(&a, 6).unwrap();
+ let a_ref: ArrayRef = Arc::new(a);
+ let b = limit(&a_ref, 6).unwrap();
let c = b.as_ref().as_any().downcast_ref::<Int32Array>().unwrap();
assert_eq!(5, c.len());
- assert_eq!(a.value(0), c.value(0));
- assert_eq!(a.value(1), c.value(1));
- assert_eq!(a.value(2), c.value(2));
- assert_eq!(a.value(3), c.value(3));
- assert_eq!(a.value(4), c.value(4));
+ assert_eq!(5, c.value(0));
+ assert_eq!(6, c.value(1));
+ assert_eq!(7, c.value(2));
+ assert_eq!(8, c.value(3));
+ assert_eq!(9, c.value(4));
}
}
diff --git a/rust/datafusion/src/execution/limit.rs b/rust/datafusion/src/execution/limit.rs
index 888fac5..bfd8706 100644
--- a/rust/datafusion/src/execution/limit.rs
+++ b/rust/datafusion/src/execution/limit.rs
@@ -59,7 +59,7 @@ impl Relation for LimitRelation {
if batch.num_rows() >= capacity {
let limited_columns: Result<Vec<ArrayRef>> = (0..batch.num_columns())
- .map(|i| match limit(batch.column(i).as_ref(), capacity) {
+ .map(|i| match limit(batch.column(i), capacity) {
Ok(result) => Ok(result),
Err(error) => Err(ExecutionError::from(error)),
})