You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2022/02/15 20:24:20 UTC
[arrow-rs] branch master updated: Use new DecimalArray creation API in arrow crate (#1249)
This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 747e72a Use new DecimalArray creation API in arrow crate (#1249)
747e72a is described below
commit 747e72a0c3bf5771c7bfde7b09c5166c5aa51bc3
Author: Andrew Lamb <an...@nerdnetworks.org>
AuthorDate: Tue Feb 15 15:24:12 2022 -0500
Use new DecimalArray creation API in arrow crate (#1249)
* Use new API in ffi.rs
* Use new API in sort.rs
* Use new API in pretty.rs
* Use new API in array_binary.rs
* Use new API in equal_json.rs
* Use new API in take.rs
* Use new API in cast.rs
* Use new API in equal.rs
* clippy
---
arrow/src/array/array_binary.rs | 31 +++++++------
arrow/src/array/equal/mod.rs | 21 ++++-----
arrow/src/array/equal_json.rs | 19 ++++----
arrow/src/array/transform/mod.rs | 19 +++-----
arrow/src/compute/kernels/cast.rs | 96 +++++++++++++++++----------------------
arrow/src/compute/kernels/sort.rs | 14 ++----
arrow/src/compute/kernels/take.rs | 82 +++++++++++++++------------------
arrow/src/ffi.rs | 14 +++---
arrow/src/util/pretty.rs | 28 ++++++------
9 files changed, 145 insertions(+), 179 deletions(-)
diff --git a/arrow/src/array/array_binary.rs b/arrow/src/array/array_binary.rs
index 7496e24..7956b0e 100644
--- a/arrow/src/array/array_binary.rs
+++ b/arrow/src/array/array_binary.rs
@@ -702,7 +702,7 @@ impl Array for FixedSizeBinaryArray {
/// # Examples
///
/// ```
-/// use arrow::array::{Array, DecimalArray, DecimalBuilder};
+/// use arrow::array::{Array, DecimalArray};
/// use arrow::datatypes::DataType;
///
/// // Create a DecimalArray with the default precision and scale
@@ -943,6 +943,12 @@ impl From<ArrayData> for DecimalArray {
}
}
+impl From<DecimalArray> for ArrayData {
+ fn from(array: DecimalArray) -> Self {
+ array.data
+ }
+}
+
/// an iterator that returns Some(i128) or None, that can be used on a
/// DecimalArray
#[derive(Debug)]
@@ -1573,11 +1579,12 @@ mod tests {
#[test]
fn test_decimal_array_value_as_string() {
- let mut decimal_builder = DecimalBuilder::new(7, 6, 3);
- for value in [123450, -123450, 100, -100, 10, -10, 0] {
- decimal_builder.append_value(value).unwrap();
- }
- let arr = decimal_builder.finish();
+ let arr = [123450, -123450, 100, -100, 10, -10, 0]
+ .into_iter()
+ .map(Some)
+ .collect::<DecimalArray>()
+ .with_precision_and_scale(6, 3)
+ .unwrap();
assert_eq!("123.450", arr.value_as_string(0));
assert_eq!("-123.450", arr.value_as_string(1));
@@ -1641,14 +1648,12 @@ mod tests {
#[test]
fn test_decimal_array_fmt_debug() {
- let values: Vec<i128> = vec![8887000000, -8887000000];
- let mut decimal_builder = DecimalBuilder::new(3, 23, 6);
+ let arr = [Some(8887000000), Some(-8887000000), None]
+ .iter()
+ .collect::<DecimalArray>()
+ .with_precision_and_scale(23, 6)
+ .unwrap();
- values.iter().for_each(|&value| {
- decimal_builder.append_value(value).unwrap();
- });
- decimal_builder.append_null().unwrap();
- let arr = decimal_builder.finish();
assert_eq!(
"DecimalArray<23, 6>\n[\n 8887.000000,\n -8887.000000,\n null,\n]",
format!("{:?}", arr)
diff --git a/arrow/src/array/equal/mod.rs b/arrow/src/array/equal/mod.rs
index 18d0ffe..7bd0b70 100644
--- a/arrow/src/array/equal/mod.rs
+++ b/arrow/src/array/equal/mod.rs
@@ -311,9 +311,9 @@ mod tests {
use crate::array::{
array::Array, ArrayData, ArrayDataBuilder, ArrayRef, BinaryOffsetSizeTrait,
- BooleanArray, DecimalBuilder, FixedSizeBinaryBuilder, FixedSizeListBuilder,
- GenericBinaryArray, Int32Builder, ListBuilder, NullArray, PrimitiveBuilder,
- StringArray, StringDictionaryBuilder, StringOffsetSizeTrait, StructArray,
+ BooleanArray, FixedSizeBinaryBuilder, FixedSizeListBuilder, GenericBinaryArray,
+ Int32Builder, ListBuilder, NullArray, PrimitiveBuilder, StringArray,
+ StringDictionaryBuilder, StringOffsetSizeTrait, StructArray,
};
use crate::array::{GenericStringArray, Int32Array};
use crate::buffer::Buffer;
@@ -818,16 +818,11 @@ mod tests {
}
fn create_decimal_array(data: &[Option<i128>]) -> ArrayData {
- let mut builder = DecimalBuilder::new(20, 23, 6);
-
- for d in data {
- if let Some(v) = d {
- builder.append_value(*v).unwrap();
- } else {
- builder.append_null().unwrap();
- }
- }
- builder.finish().data().clone()
+ data.iter()
+ .collect::<DecimalArray>()
+ .with_precision_and_scale(23, 6)
+ .unwrap()
+ .into()
}
#[test]
diff --git a/arrow/src/array/equal_json.rs b/arrow/src/array/equal_json.rs
index 0350fc9..85ace90 100644
--- a/arrow/src/array/equal_json.rs
+++ b/arrow/src/array/equal_json.rs
@@ -937,11 +937,11 @@ mod tests {
#[test]
fn test_decimal_json_equal() {
// Test the equal case
- let mut builder = DecimalBuilder::new(30, 23, 6);
- builder.append_value(1_000).unwrap();
- builder.append_null().unwrap();
- builder.append_value(-250).unwrap();
- let arrow_array: DecimalArray = builder.finish();
+ let arrow_array = [Some(1_000), None, Some(-250)]
+ .iter()
+ .collect::<DecimalArray>()
+ .with_precision_and_scale(23, 6)
+ .unwrap();
let json_array: Value = serde_json::from_str(
r#"
[
@@ -956,10 +956,11 @@ mod tests {
assert!(json_array.eq(&arrow_array));
// Test unequal case
- builder.append_value(1_000).unwrap();
- builder.append_null().unwrap();
- builder.append_value(55).unwrap();
- let arrow_array: DecimalArray = builder.finish();
+ let arrow_array = [Some(1_000), None, Some(55)]
+ .iter()
+ .collect::<DecimalArray>()
+ .with_precision_and_scale(23, 6)
+ .unwrap();
let json_array: Value = serde_json::from_str(
r#"
[
diff --git a/arrow/src/array/transform/mod.rs b/arrow/src/array/transform/mod.rs
index 2f69ea8..77e18c7 100644
--- a/arrow/src/array/transform/mod.rs
+++ b/arrow/src/array/transform/mod.rs
@@ -670,7 +670,7 @@ mod tests {
use super::*;
- use crate::array::{DecimalArray, DecimalBuilder};
+ use crate::array::DecimalArray;
use crate::{
array::{
Array, ArrayData, ArrayRef, BooleanArray, DictionaryArray,
@@ -691,18 +691,11 @@ mod tests {
precision: usize,
scale: usize,
) -> DecimalArray {
- let mut decimal_builder = DecimalBuilder::new(array.len(), precision, scale);
- for value in array {
- match value {
- None => {
- decimal_builder.append_null().unwrap();
- }
- Some(v) => {
- decimal_builder.append_value(*v).unwrap();
- }
- }
- }
- decimal_builder.finish()
+ array
+ .iter()
+ .collect::<DecimalArray>()
+ .with_precision_and_scale(precision, scale)
+ .unwrap()
}
#[test]
diff --git a/arrow/src/compute/kernels/cast.rs b/arrow/src/compute/kernels/cast.rs
index 9089e78..827a182 100644
--- a/arrow/src/compute/kernels/cast.rs
+++ b/arrow/src/compute/kernels/cast.rs
@@ -260,39 +260,41 @@ pub fn cast(array: &ArrayRef, to_type: &DataType) -> Result<ArrayRef> {
// cast the integer array to defined decimal data type array
macro_rules! cast_integer_to_decimal {
($ARRAY: expr, $ARRAY_TYPE: ident, $PRECISION : ident, $SCALE : ident) => {{
- let mut decimal_builder = DecimalBuilder::new($ARRAY.len(), *$PRECISION, *$SCALE);
let array = $ARRAY.as_any().downcast_ref::<$ARRAY_TYPE>().unwrap();
let mul: i128 = 10_i128.pow(*$SCALE as u32);
- for i in 0..array.len() {
- if array.is_null(i) {
- decimal_builder.append_null()?;
- } else {
- // convert i128 first
- let v = array.value(i) as i128;
- // if the input value is overflow, it will throw an error.
- decimal_builder.append_value(mul * v)?;
- }
- }
- Ok(Arc::new(decimal_builder.finish()))
+ let decimal_array = array
+ .iter()
+ .map(|v| {
+ v.map(|v| {
+ let v = v as i128;
+ // with_precision_and_scale validates the
+ // value is within range for the output precision
+ mul * v
+ })
+ })
+ .collect::<DecimalArray>()
+ .with_precision_and_scale(*$PRECISION, *$SCALE)?;
+ Ok(Arc::new(decimal_array))
}};
}
// cast the floating-point array to defined decimal data type array
macro_rules! cast_floating_point_to_decimal {
($ARRAY: expr, $ARRAY_TYPE: ident, $PRECISION : ident, $SCALE : ident) => {{
- let mut decimal_builder = DecimalBuilder::new($ARRAY.len(), *$PRECISION, *$SCALE);
let array = $ARRAY.as_any().downcast_ref::<$ARRAY_TYPE>().unwrap();
let mul = 10_f64.powi(*$SCALE as i32);
- for i in 0..array.len() {
- if array.is_null(i) {
- decimal_builder.append_null()?;
- } else {
- let v = ((array.value(i) as f64) * mul) as i128;
- // if the input value is overflow, it will throw an error.
- decimal_builder.append_value(v)?;
- }
- }
- Ok(Arc::new(decimal_builder.finish()))
+ let decimal_array = array
+ .iter()
+ .map(|v| {
+ v.map(|v| {
+ // with_precision_and_scale validates the
+ // value is within range for the output precision
+ ((v as f64) * mul) as i128
+ })
+ })
+ .collect::<DecimalArray>()
+ .with_precision_and_scale(*$PRECISION, *$SCALE)?;
+ Ok(Arc::new(decimal_array))
}};
}
@@ -1166,33 +1168,28 @@ fn cast_decimal_to_decimal(
output_precision: &usize,
output_scale: &usize,
) -> Result<ArrayRef> {
- let mut decimal_builder =
- DecimalBuilder::new(array.len(), *output_precision, *output_scale);
let array = array.as_any().downcast_ref::<DecimalArray>().unwrap();
- if input_scale > output_scale {
+
+ let output_array = if input_scale > output_scale {
// For example, input_scale is 4 and output_scale is 3;
// Original value is 11234_i128, and will be cast to 1123_i128.
let div = 10_i128.pow((input_scale - output_scale) as u32);
- for i in 0..array.len() {
- if array.is_null(i) {
- decimal_builder.append_null()?;
- } else {
- decimal_builder.append_value(array.value(i) / div)?;
- }
- }
+ array
+ .iter()
+ .map(|v| v.map(|v| v / div))
+ .collect::<DecimalArray>()
} else {
// For example, input_scale is 3 and output_scale is 4;
// Original value is 1123_i128, and will be cast to 11230_i128.
let mul = 10_i128.pow((output_scale - input_scale) as u32);
- for i in 0..array.len() {
- if array.is_null(i) {
- decimal_builder.append_null()?;
- } else {
- decimal_builder.append_value(array.value(i) * mul)?;
- }
- }
+ array
+ .iter()
+ .map(|v| v.map(|v| v * mul))
+ .collect::<DecimalArray>()
}
- Ok(Arc::new(decimal_builder.finish()))
+ .with_precision_and_scale(*output_precision, *output_scale)?;
+
+ Ok(Arc::new(output_array))
}
/// Cast an array by changing its array_data type to the desired type
@@ -2071,24 +2068,15 @@ mod tests {
};
}
- // TODO remove this function if the decimal array has the creator function
fn create_decimal_array(
array: &[Option<i128>],
precision: usize,
scale: usize,
) -> Result<DecimalArray> {
- let mut decimal_builder = DecimalBuilder::new(array.len(), precision, scale);
- for value in array {
- match value {
- None => {
- decimal_builder.append_null()?;
- }
- Some(v) => {
- decimal_builder.append_value(*v)?;
- }
- }
- }
- Ok(decimal_builder.finish())
+ array
+ .iter()
+ .collect::<DecimalArray>()
+ .with_precision_and_scale(precision, scale)
}
#[test]
diff --git a/arrow/src/compute/kernels/sort.rs b/arrow/src/compute/kernels/sort.rs
index 3553cd3..37ece56 100644
--- a/arrow/src/compute/kernels/sort.rs
+++ b/arrow/src/compute/kernels/sort.rs
@@ -1107,16 +1107,10 @@ mod tests {
use std::sync::Arc;
fn create_decimal_array(data: &[Option<i128>]) -> DecimalArray {
- let mut builder = DecimalBuilder::new(20, 23, 6);
-
- for d in data {
- if let Some(v) = d {
- builder.append_value(*v).unwrap();
- } else {
- builder.append_null().unwrap();
- }
- }
- builder.finish()
+ data.iter()
+ .collect::<DecimalArray>()
+ .with_precision_and_scale(23, 6)
+ .unwrap()
}
fn test_sort_to_indices_decimal_array(
diff --git a/arrow/src/compute/kernels/take.rs b/arrow/src/compute/kernels/take.rs
index a3b5283..562db0f 100644
--- a/arrow/src/compute/kernels/take.rs
+++ b/arrow/src/compute/kernels/take.rs
@@ -496,27 +496,30 @@ where
IndexType: ArrowNumericType,
IndexType::Native: ToPrimitive,
{
- // TODO optimize decimal take and construct decimal array from MutableBuffer
- let mut builder = DecimalBuilder::new(
- indices.len(),
- decimal_values.precision(),
- decimal_values.scale(),
- );
- for i in 0..indices.len() {
- if indices.is_null(i) {
- builder.append_null()?;
- } else {
- let index = ToPrimitive::to_usize(&indices.value(i)).ok_or_else(|| {
- ArrowError::ComputeError("Cast to usize failed".to_string())
- })?;
- if decimal_values.is_null(index) {
- builder.append_null()?
- } else {
- builder.append_value(decimal_values.value(index))?
- }
- }
- }
- Ok(builder.finish())
+ indices
+ .iter()
+ .map(|index| {
+ // Use type annotations below for readability (was blowing
+ // my mind otherwise)
+ let t: Option<Result<Option<_>>> = index.map(|index| {
+ let index = ToPrimitive::to_usize(&index).ok_or_else(|| {
+ ArrowError::ComputeError("Cast to usize failed".to_string())
+ })?;
+
+ if decimal_values.is_null(index) {
+ Ok(None)
+ } else {
+ Ok(Some(decimal_values.value(index)))
+ }
+ });
+ let t: Result<Option<Option<_>>> = t.transpose();
+ let t: Result<Option<_>> = t.map(|t| t.flatten());
+ t
+ })
+ .collect::<Result<DecimalArray>>()?
+ // PERF: we could avoid re-validating that the data in
+ // DecimalArray was in range as we know it came from a valid DecimalArray
+ .with_precision_and_scale(decimal_values.precision(), decimal_values.scale())
}
/// `take` implementation for all primitive arrays
@@ -965,30 +968,19 @@ mod tests {
precision: &usize,
scale: &usize,
) -> Result<()> {
- let mut builder = DecimalBuilder::new(data.len(), *precision, *scale);
- for value in data {
- match value {
- None => {
- builder.append_null()?;
- }
- Some(v) => {
- builder.append_value(v)?;
- }
- }
- }
- let output = builder.finish();
- let mut builder = DecimalBuilder::new(expected_data.len(), *precision, *scale);
- for value in expected_data {
- match value {
- None => {
- builder.append_null()?;
- }
- Some(v) => {
- builder.append_value(v)?;
- }
- }
- }
- let expected = Arc::new(builder.finish()) as ArrayRef;
+ let output = data
+ .into_iter()
+ .collect::<DecimalArray>()
+ .with_precision_and_scale(*precision, *scale)
+ .unwrap();
+
+ let expected = expected_data
+ .into_iter()
+ .collect::<DecimalArray>()
+ .with_precision_and_scale(*precision, *scale)
+ .unwrap();
+
+ let expected = Arc::new(expected) as ArrayRef;
let output = take(&output, index, options).unwrap();
assert_eq!(&output, &expected);
Ok(())
diff --git a/arrow/src/ffi.rs b/arrow/src/ffi.rs
index 2d9577c..ac8fc19 100644
--- a/arrow/src/ffi.rs
+++ b/arrow/src/ffi.rs
@@ -760,8 +760,8 @@ mod tests {
use super::*;
use crate::array::{
make_array, Array, ArrayData, BinaryOffsetSizeTrait, BooleanArray, DecimalArray,
- DecimalBuilder, GenericBinaryArray, GenericListArray, GenericStringArray,
- Int32Array, OffsetSizeTrait, StringOffsetSizeTrait, Time32MillisecondArray,
+ GenericBinaryArray, GenericListArray, GenericStringArray, Int32Array,
+ OffsetSizeTrait, StringOffsetSizeTrait, Time32MillisecondArray,
TimestampMillisecondArray,
};
use crate::compute::kernels;
@@ -794,11 +794,11 @@ mod tests {
#[test]
fn test_decimal_round_trip() -> Result<()> {
// create an array natively
- let mut builder = DecimalBuilder::new(5, 6, 2);
- builder.append_value(12345_i128).unwrap();
- builder.append_value(-12345_i128).unwrap();
- builder.append_null().unwrap();
- let original_array = builder.finish();
+ let original_array = [Some(12345_i128), Some(-12345_i128), None]
+ .into_iter()
+ .collect::<DecimalArray>()
+ .with_precision_and_scale(6, 2)
+ .unwrap();
// export it
let array = ArrowArray::try_from(original_array.data().clone())?;
diff --git a/arrow/src/util/pretty.rs b/arrow/src/util/pretty.rs
index 4b67f3d..f7e05bc 100644
--- a/arrow/src/util/pretty.rs
+++ b/arrow/src/util/pretty.rs
@@ -119,7 +119,7 @@ mod tests {
};
use super::*;
- use crate::array::{DecimalBuilder, FixedSizeListBuilder, Int32Array};
+ use crate::array::{DecimalArray, FixedSizeListBuilder, Int32Array};
use std::fmt::Write;
use std::sync::Arc;
@@ -517,17 +517,16 @@ mod tests {
#[test]
fn test_decimal_display() -> Result<()> {
- let capacity = 10;
let precision = 10;
let scale = 2;
- let mut builder = DecimalBuilder::new(capacity, precision, scale);
- builder.append_value(101).unwrap();
- builder.append_null().unwrap();
- builder.append_value(200).unwrap();
- builder.append_value(3040).unwrap();
+ let array = [Some(101), None, Some(200), Some(3040)]
+ .into_iter()
+ .collect::<DecimalArray>()
+ .with_precision_and_scale(precision, scale)
+ .unwrap();
- let dm = Arc::new(builder.finish()) as ArrayRef;
+ let dm = Arc::new(array) as ArrayRef;
let schema = Arc::new(Schema::new(vec![Field::new(
"f",
@@ -558,17 +557,16 @@ mod tests {
#[test]
fn test_decimal_display_zero_scale() -> Result<()> {
- let capacity = 10;
let precision = 5;
let scale = 0;
- let mut builder = DecimalBuilder::new(capacity, precision, scale);
- builder.append_value(101).unwrap();
- builder.append_null().unwrap();
- builder.append_value(200).unwrap();
- builder.append_value(3040).unwrap();
+ let array = [Some(101), None, Some(200), Some(3040)]
+ .into_iter()
+ .collect::<DecimalArray>()
+ .with_precision_and_scale(precision, scale)
+ .unwrap();
- let dm = Arc::new(builder.finish()) as ArrayRef;
+ let dm = Arc::new(array) as ArrayRef;
let schema = Arc::new(Schema::new(vec![Field::new(
"f",