You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2022/10/29 17:52:55 UTC
[arrow-rs] branch master updated: More concat kernel to arrow-select (#2594) (#2976)
This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 999a6ae28 More concat kernel to arrow-select (#2594) (#2976)
999a6ae28 is described below
commit 999a6ae28524aedf57a0efb91640bc17bb7a5c7c
Author: Raphael Taylor-Davies <17...@users.noreply.github.com>
AuthorDate: Sun Oct 30 06:52:48 2022 +1300
More concat kernel to arrow-select (#2594) (#2976)
---
.../compute/kernels => arrow-select/src}/concat.rs | 90 +++++++++-------------
arrow-select/src/lib.rs | 1 +
arrow/src/compute/kernels/mod.rs | 3 +-
3 files changed, 40 insertions(+), 54 deletions(-)
diff --git a/arrow/src/compute/kernels/concat.rs b/arrow-select/src/concat.rs
similarity index 93%
rename from arrow/src/compute/kernels/concat.rs
rename to arrow-select/src/concat.rs
index b6edf8c99..a1bb64be5 100644
--- a/arrow/src/compute/kernels/concat.rs
+++ b/arrow-select/src/concat.rs
@@ -20,8 +20,8 @@
//! Example:
//!
//! ```
-//! use arrow::array::{ArrayRef, StringArray};
-//! use arrow::compute::concat;
+//! use arrow_array::{ArrayRef, StringArray};
+//! use arrow_select::concat::concat;
//!
//! let arr = concat(&[
//! &StringArray::from(vec!["hello", "world"]),
@@ -30,10 +30,10 @@
//! assert_eq!(arr.len(), 3);
//! ```
-use crate::array::*;
-use crate::datatypes::{DataType, SchemaRef};
-use crate::error::{ArrowError, Result};
-use crate::record_batch::RecordBatch;
+use arrow_array::*;
+use arrow_data::transform::{Capacities, MutableArrayData};
+use arrow_data::ArrayData;
+use arrow_schema::{ArrowError, DataType, SchemaRef};
fn compute_str_values_length<Offset: OffsetSizeTrait>(arrays: &[&ArrayData]) -> usize {
arrays
@@ -51,7 +51,7 @@ fn compute_str_values_length<Offset: OffsetSizeTrait>(arrays: &[&ArrayData]) ->
}
/// Concatenate multiple [Array] of the same type into a single [ArrayRef].
-pub fn concat(arrays: &[&dyn Array]) -> Result<ArrayRef> {
+pub fn concat(arrays: &[&dyn Array]) -> Result<ArrayRef, ArrowError> {
if arrays.is_empty() {
return Err(ArrowError::ComputeError(
"concat requires input of at least one array".to_string(),
@@ -107,7 +107,7 @@ pub fn concat(arrays: &[&dyn Array]) -> Result<ArrayRef> {
pub fn concat_batches(
schema: &SchemaRef,
batches: &[RecordBatch],
-) -> Result<RecordBatch> {
+) -> Result<RecordBatch, ArrowError> {
if batches.is_empty() {
return Ok(RecordBatch::new_empty(schema.clone()));
}
@@ -138,7 +138,8 @@ pub fn concat_batches(
#[cfg(test)]
mod tests {
use super::*;
- use crate::datatypes::*;
+ use arrow_array::types::*;
+ use arrow_schema::{Field, Schema};
use std::sync::Arc;
#[test]
@@ -148,18 +149,17 @@ mod tests {
}
#[test]
- fn test_concat_one_element_vec() -> Result<()> {
+ fn test_concat_one_element_vec() {
let arr = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
Some(-1),
Some(2),
None,
])) as ArrayRef;
- let result = concat(&[arr.as_ref()])?;
+ let result = concat(&[arr.as_ref()]).unwrap();
assert_eq!(
&arr, &result,
"concatenating single element array gives back the same result"
);
- Ok(())
}
#[test]
@@ -172,12 +172,13 @@ mod tests {
}
#[test]
- fn test_concat_string_arrays() -> Result<()> {
+ fn test_concat_string_arrays() {
let arr = concat(&[
&StringArray::from(vec!["hello", "world"]),
&StringArray::from(vec!["2", "3", "4"]),
&StringArray::from(vec![Some("foo"), Some("bar"), None, Some("baz")]),
- ])?;
+ ])
+ .unwrap();
let expected_output = Arc::new(StringArray::from(vec![
Some("hello"),
@@ -192,12 +193,10 @@ mod tests {
])) as ArrayRef;
assert_eq!(&arr, &expected_output);
-
- Ok(())
}
#[test]
- fn test_concat_primitive_arrays() -> Result<()> {
+ fn test_concat_primitive_arrays() {
let arr = concat(&[
&PrimitiveArray::<Int64Type>::from(vec![
Some(-1),
@@ -213,7 +212,8 @@ mod tests {
None,
]),
&PrimitiveArray::<Int64Type>::from(vec![Some(256), Some(512), Some(1024)]),
- ])?;
+ ])
+ .unwrap();
let expected_output = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
Some(-1),
@@ -231,12 +231,10 @@ mod tests {
])) as ArrayRef;
assert_eq!(&arr, &expected_output);
-
- Ok(())
}
#[test]
- fn test_concat_primitive_array_slices() -> Result<()> {
+ fn test_concat_primitive_array_slices() {
let input_1 = PrimitiveArray::<Int64Type>::from(vec![
Some(-1),
Some(-1),
@@ -253,7 +251,7 @@ mod tests {
None,
])
.slice(1, 3);
- let arr = concat(&[input_1.as_ref(), input_2.as_ref()])?;
+ let arr = concat(&[input_1.as_ref(), input_2.as_ref()]).unwrap();
let expected_output = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
Some(-1),
@@ -265,12 +263,10 @@ mod tests {
])) as ArrayRef;
assert_eq!(&arr, &expected_output);
-
- Ok(())
}
#[test]
- fn test_concat_boolean_primitive_arrays() -> Result<()> {
+ fn test_concat_boolean_primitive_arrays() {
let arr = concat(&[
&BooleanArray::from(vec![
Some(true),
@@ -281,7 +277,8 @@ mod tests {
Some(false),
]),
&BooleanArray::from(vec![None, Some(false), Some(true), Some(false)]),
- ])?;
+ ])
+ .unwrap();
let expected_output = Arc::new(BooleanArray::from(vec![
Some(true),
@@ -297,12 +294,10 @@ mod tests {
])) as ArrayRef;
assert_eq!(&arr, &expected_output);
-
- Ok(())
}
#[test]
- fn test_concat_primitive_list_arrays() -> Result<()> {
+ fn test_concat_primitive_list_arrays() {
let list1 = vec![
Some(vec![Some(-1), Some(-1), Some(2), None, None]),
Some(vec![]),
@@ -324,7 +319,7 @@ mod tests {
let list3_array =
ListArray::from_iter_primitive::<Int64Type, _, _>(list3.clone());
- let array_result = concat(&[&list1_array, &list2_array, &list3_array])?;
+ let array_result = concat(&[&list1_array, &list2_array, &list3_array]).unwrap();
let expected = list1
.into_iter()
@@ -333,12 +328,10 @@ mod tests {
let array_expected = ListArray::from_iter_primitive::<Int64Type, _, _>(expected);
assert_eq!(array_result.as_ref(), &array_expected as &dyn Array);
-
- Ok(())
}
#[test]
- fn test_concat_struct_arrays() -> Result<()> {
+ fn test_concat_struct_arrays() {
let field = Field::new("field", DataType::Int64, true);
let input_primitive_1: ArrayRef =
Arc::new(PrimitiveArray::<Int64Type>::from(vec![
@@ -367,7 +360,7 @@ mod tests {
]));
let input_struct_3 = StructArray::from(vec![(field, input_primitive_3)]);
- let arr = concat(&[&input_struct_1, &input_struct_2, &input_struct_3])?;
+ let arr = concat(&[&input_struct_1, &input_struct_2, &input_struct_3]).unwrap();
let expected_primitive_output = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
Some(-1),
@@ -390,12 +383,10 @@ mod tests {
.unwrap()
.column(0);
assert_eq!(actual_primitive, &expected_primitive_output);
-
- Ok(())
}
#[test]
- fn test_concat_struct_array_slices() -> Result<()> {
+ fn test_concat_struct_array_slices() {
let field = Field::new("field", DataType::Int64, true);
let input_primitive_1: ArrayRef =
Arc::new(PrimitiveArray::<Int64Type>::from(vec![
@@ -419,7 +410,8 @@ mod tests {
let arr = concat(&[
input_struct_1.slice(1, 3).as_ref(),
input_struct_2.slice(1, 2).as_ref(),
- ])?;
+ ])
+ .unwrap();
let expected_primitive_output = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
Some(-1),
@@ -435,39 +427,35 @@ mod tests {
.unwrap()
.column(0);
assert_eq!(actual_primitive, &expected_primitive_output);
-
- Ok(())
}
#[test]
- fn test_string_array_slices() -> Result<()> {
+ fn test_string_array_slices() {
let input_1 = StringArray::from(vec!["hello", "A", "B", "C"]);
let input_2 = StringArray::from(vec!["world", "D", "E", "Z"]);
- let arr = concat(&[input_1.slice(1, 3).as_ref(), input_2.slice(1, 2).as_ref()])?;
+ let arr = concat(&[input_1.slice(1, 3).as_ref(), input_2.slice(1, 2).as_ref()])
+ .unwrap();
let expected_output = StringArray::from(vec!["A", "B", "C", "D", "E"]);
let actual_output = arr.as_any().downcast_ref::<StringArray>().unwrap();
assert_eq!(actual_output, &expected_output);
-
- Ok(())
}
#[test]
- fn test_string_array_with_null_slices() -> Result<()> {
+ fn test_string_array_with_null_slices() {
let input_1 = StringArray::from(vec![Some("hello"), None, Some("A"), Some("C")]);
let input_2 = StringArray::from(vec![None, Some("world"), Some("D"), None]);
- let arr = concat(&[input_1.slice(1, 3).as_ref(), input_2.slice(1, 2).as_ref()])?;
+ let arr = concat(&[input_1.slice(1, 3).as_ref(), input_2.slice(1, 2).as_ref()])
+ .unwrap();
let expected_output =
StringArray::from(vec![None, Some("A"), Some("C"), Some("world"), Some("D")]);
let actual_output = arr.as_any().downcast_ref::<StringArray>().unwrap();
assert_eq!(actual_output, &expected_output);
-
- Ok(())
}
fn collect_string_dictionary(
@@ -539,7 +527,7 @@ mod tests {
}
#[test]
- fn test_concat_string_sizes() -> Result<()> {
+ fn test_concat_string_sizes() {
let a: LargeStringArray = ((0..150).map(|_| Some("foo"))).collect();
let b: LargeStringArray = ((0..150).map(|_| Some("foo"))).collect();
let c = LargeStringArray::from(vec![Some("foo"), Some("bar"), None, Some("baz")]);
@@ -550,11 +538,9 @@ mod tests {
// 909
// closest 64 byte aligned cap = 960
- let arr = concat(&[&a, &b, &c])?;
+ let arr = concat(&[&a, &b, &c]).unwrap();
// this would have been 1280 if we did not precompute the value lengths.
assert_eq!(arr.data().buffers()[1].capacity(), 960);
-
- Ok(())
}
#[test]
diff --git a/arrow-select/src/lib.rs b/arrow-select/src/lib.rs
index 159c9b0ff..5249b5c4c 100644
--- a/arrow-select/src/lib.rs
+++ b/arrow-select/src/lib.rs
@@ -17,6 +17,7 @@
//! Arrow selection kernels
+pub mod concat;
pub mod filter;
pub mod interleave;
pub mod take;
diff --git a/arrow/src/compute/kernels/mod.rs b/arrow/src/compute/kernels/mod.rs
index 68ae2439f..a772f5bcc 100644
--- a/arrow/src/compute/kernels/mod.rs
+++ b/arrow/src/compute/kernels/mod.rs
@@ -25,7 +25,6 @@ pub mod boolean;
pub mod cast;
pub mod cast_utils;
pub mod comparison;
-pub mod concat;
pub mod concat_elements;
pub mod length;
pub mod limit;
@@ -37,4 +36,4 @@ pub mod temporal;
pub mod window;
pub mod zip;
-pub use arrow_select::{filter, interleave, take};
+pub use arrow_select::{concat, filter, interleave, take};