You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2022/02/15 20:24:20 UTC

[arrow-rs] branch master updated: Use new DecimalArray creation API in arrow crate (#1249)

This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 747e72a  Use new DecimalArray creation API in arrow crate (#1249)
747e72a is described below

commit 747e72a0c3bf5771c7bfde7b09c5166c5aa51bc3
Author: Andrew Lamb <an...@nerdnetworks.org>
AuthorDate: Tue Feb 15 15:24:12 2022 -0500

    Use new DecimalArray creation API in arrow crate (#1249)
    
    * Use new API in ffi.rs
    
    * Use new API in sort.rs
    
    * Use new API in pretty.rs
    
    * Use new API in array_binary.rs
    
    * Use new API in equal_json.rs
    
    * Use new API in take.rs
    
    * Use new API in cast.rs
    
    * Use new API in equal.rs
    
    * clippy
---
 arrow/src/array/array_binary.rs   | 31 +++++++------
 arrow/src/array/equal/mod.rs      | 21 ++++-----
 arrow/src/array/equal_json.rs     | 19 ++++----
 arrow/src/array/transform/mod.rs  | 19 +++-----
 arrow/src/compute/kernels/cast.rs | 96 +++++++++++++++++----------------------
 arrow/src/compute/kernels/sort.rs | 14 ++----
 arrow/src/compute/kernels/take.rs | 82 +++++++++++++++------------------
 arrow/src/ffi.rs                  | 14 +++---
 arrow/src/util/pretty.rs          | 28 ++++++------
 9 files changed, 145 insertions(+), 179 deletions(-)

diff --git a/arrow/src/array/array_binary.rs b/arrow/src/array/array_binary.rs
index 7496e24..7956b0e 100644
--- a/arrow/src/array/array_binary.rs
+++ b/arrow/src/array/array_binary.rs
@@ -702,7 +702,7 @@ impl Array for FixedSizeBinaryArray {
 /// # Examples
 ///
 /// ```
-///    use arrow::array::{Array, DecimalArray, DecimalBuilder};
+///    use arrow::array::{Array, DecimalArray};
 ///    use arrow::datatypes::DataType;
 ///
 ///    // Create a DecimalArray with the default precision and scale
@@ -943,6 +943,12 @@ impl From<ArrayData> for DecimalArray {
     }
 }
 
+impl From<DecimalArray> for ArrayData {
+    fn from(array: DecimalArray) -> Self {
+        array.data
+    }
+}
+
 /// an iterator that returns Some(i128) or None, that can be used on a
 /// DecimalArray
 #[derive(Debug)]
@@ -1573,11 +1579,12 @@ mod tests {
 
     #[test]
     fn test_decimal_array_value_as_string() {
-        let mut decimal_builder = DecimalBuilder::new(7, 6, 3);
-        for value in [123450, -123450, 100, -100, 10, -10, 0] {
-            decimal_builder.append_value(value).unwrap();
-        }
-        let arr = decimal_builder.finish();
+        let arr = [123450, -123450, 100, -100, 10, -10, 0]
+            .into_iter()
+            .map(Some)
+            .collect::<DecimalArray>()
+            .with_precision_and_scale(6, 3)
+            .unwrap();
 
         assert_eq!("123.450", arr.value_as_string(0));
         assert_eq!("-123.450", arr.value_as_string(1));
@@ -1641,14 +1648,12 @@ mod tests {
 
     #[test]
     fn test_decimal_array_fmt_debug() {
-        let values: Vec<i128> = vec![8887000000, -8887000000];
-        let mut decimal_builder = DecimalBuilder::new(3, 23, 6);
+        let arr = [Some(8887000000), Some(-8887000000), None]
+            .iter()
+            .collect::<DecimalArray>()
+            .with_precision_and_scale(23, 6)
+            .unwrap();
 
-        values.iter().for_each(|&value| {
-            decimal_builder.append_value(value).unwrap();
-        });
-        decimal_builder.append_null().unwrap();
-        let arr = decimal_builder.finish();
         assert_eq!(
             "DecimalArray<23, 6>\n[\n  8887.000000,\n  -8887.000000,\n  null,\n]",
             format!("{:?}", arr)
diff --git a/arrow/src/array/equal/mod.rs b/arrow/src/array/equal/mod.rs
index 18d0ffe..7bd0b70 100644
--- a/arrow/src/array/equal/mod.rs
+++ b/arrow/src/array/equal/mod.rs
@@ -311,9 +311,9 @@ mod tests {
 
     use crate::array::{
         array::Array, ArrayData, ArrayDataBuilder, ArrayRef, BinaryOffsetSizeTrait,
-        BooleanArray, DecimalBuilder, FixedSizeBinaryBuilder, FixedSizeListBuilder,
-        GenericBinaryArray, Int32Builder, ListBuilder, NullArray, PrimitiveBuilder,
-        StringArray, StringDictionaryBuilder, StringOffsetSizeTrait, StructArray,
+        BooleanArray, FixedSizeBinaryBuilder, FixedSizeListBuilder, GenericBinaryArray,
+        Int32Builder, ListBuilder, NullArray, PrimitiveBuilder, StringArray,
+        StringDictionaryBuilder, StringOffsetSizeTrait, StructArray,
     };
     use crate::array::{GenericStringArray, Int32Array};
     use crate::buffer::Buffer;
@@ -818,16 +818,11 @@ mod tests {
     }
 
     fn create_decimal_array(data: &[Option<i128>]) -> ArrayData {
-        let mut builder = DecimalBuilder::new(20, 23, 6);
-
-        for d in data {
-            if let Some(v) = d {
-                builder.append_value(*v).unwrap();
-            } else {
-                builder.append_null().unwrap();
-            }
-        }
-        builder.finish().data().clone()
+        data.iter()
+            .collect::<DecimalArray>()
+            .with_precision_and_scale(23, 6)
+            .unwrap()
+            .into()
     }
 
     #[test]
diff --git a/arrow/src/array/equal_json.rs b/arrow/src/array/equal_json.rs
index 0350fc9..85ace90 100644
--- a/arrow/src/array/equal_json.rs
+++ b/arrow/src/array/equal_json.rs
@@ -937,11 +937,11 @@ mod tests {
     #[test]
     fn test_decimal_json_equal() {
         // Test the equal case
-        let mut builder = DecimalBuilder::new(30, 23, 6);
-        builder.append_value(1_000).unwrap();
-        builder.append_null().unwrap();
-        builder.append_value(-250).unwrap();
-        let arrow_array: DecimalArray = builder.finish();
+        let arrow_array = [Some(1_000), None, Some(-250)]
+            .iter()
+            .collect::<DecimalArray>()
+            .with_precision_and_scale(23, 6)
+            .unwrap();
         let json_array: Value = serde_json::from_str(
             r#"
             [
@@ -956,10 +956,11 @@ mod tests {
         assert!(json_array.eq(&arrow_array));
 
         // Test unequal case
-        builder.append_value(1_000).unwrap();
-        builder.append_null().unwrap();
-        builder.append_value(55).unwrap();
-        let arrow_array: DecimalArray = builder.finish();
+        let arrow_array = [Some(1_000), None, Some(55)]
+            .iter()
+            .collect::<DecimalArray>()
+            .with_precision_and_scale(23, 6)
+            .unwrap();
         let json_array: Value = serde_json::from_str(
             r#"
             [
diff --git a/arrow/src/array/transform/mod.rs b/arrow/src/array/transform/mod.rs
index 2f69ea8..77e18c7 100644
--- a/arrow/src/array/transform/mod.rs
+++ b/arrow/src/array/transform/mod.rs
@@ -670,7 +670,7 @@ mod tests {
 
     use super::*;
 
-    use crate::array::{DecimalArray, DecimalBuilder};
+    use crate::array::DecimalArray;
     use crate::{
         array::{
             Array, ArrayData, ArrayRef, BooleanArray, DictionaryArray,
@@ -691,18 +691,11 @@ mod tests {
         precision: usize,
         scale: usize,
     ) -> DecimalArray {
-        let mut decimal_builder = DecimalBuilder::new(array.len(), precision, scale);
-        for value in array {
-            match value {
-                None => {
-                    decimal_builder.append_null().unwrap();
-                }
-                Some(v) => {
-                    decimal_builder.append_value(*v).unwrap();
-                }
-            }
-        }
-        decimal_builder.finish()
+        array
+            .iter()
+            .collect::<DecimalArray>()
+            .with_precision_and_scale(precision, scale)
+            .unwrap()
     }
 
     #[test]
diff --git a/arrow/src/compute/kernels/cast.rs b/arrow/src/compute/kernels/cast.rs
index 9089e78..827a182 100644
--- a/arrow/src/compute/kernels/cast.rs
+++ b/arrow/src/compute/kernels/cast.rs
@@ -260,39 +260,41 @@ pub fn cast(array: &ArrayRef, to_type: &DataType) -> Result<ArrayRef> {
 // cast the integer array to defined decimal data type array
 macro_rules! cast_integer_to_decimal {
     ($ARRAY: expr, $ARRAY_TYPE: ident, $PRECISION : ident, $SCALE : ident) => {{
-        let mut decimal_builder = DecimalBuilder::new($ARRAY.len(), *$PRECISION, *$SCALE);
         let array = $ARRAY.as_any().downcast_ref::<$ARRAY_TYPE>().unwrap();
         let mul: i128 = 10_i128.pow(*$SCALE as u32);
-        for i in 0..array.len() {
-            if array.is_null(i) {
-                decimal_builder.append_null()?;
-            } else {
-                // convert i128 first
-                let v = array.value(i) as i128;
-                // if the input value is overflow, it will throw an error.
-                decimal_builder.append_value(mul * v)?;
-            }
-        }
-        Ok(Arc::new(decimal_builder.finish()))
+        let decimal_array = array
+            .iter()
+            .map(|v| {
+                v.map(|v| {
+                    let v = v as i128;
+                    // with_precision_and_scale validates the
+                    // value is within range for the output precision
+                    mul * v
+                })
+            })
+            .collect::<DecimalArray>()
+            .with_precision_and_scale(*$PRECISION, *$SCALE)?;
+        Ok(Arc::new(decimal_array))
     }};
 }
 
 // cast the floating-point array to defined decimal data type array
 macro_rules! cast_floating_point_to_decimal {
     ($ARRAY: expr, $ARRAY_TYPE: ident, $PRECISION : ident, $SCALE : ident) => {{
-        let mut decimal_builder = DecimalBuilder::new($ARRAY.len(), *$PRECISION, *$SCALE);
         let array = $ARRAY.as_any().downcast_ref::<$ARRAY_TYPE>().unwrap();
         let mul = 10_f64.powi(*$SCALE as i32);
-        for i in 0..array.len() {
-            if array.is_null(i) {
-                decimal_builder.append_null()?;
-            } else {
-                let v = ((array.value(i) as f64) * mul) as i128;
-                // if the input value is overflow, it will throw an error.
-                decimal_builder.append_value(v)?;
-            }
-        }
-        Ok(Arc::new(decimal_builder.finish()))
+        let decimal_array = array
+            .iter()
+            .map(|v| {
+                v.map(|v| {
+                    // with_precision_and_scale validates the
+                    // value is within range for the output precision
+                    ((v as f64) * mul) as i128
+                })
+            })
+            .collect::<DecimalArray>()
+            .with_precision_and_scale(*$PRECISION, *$SCALE)?;
+        Ok(Arc::new(decimal_array))
     }};
 }
 
@@ -1166,33 +1168,28 @@ fn cast_decimal_to_decimal(
     output_precision: &usize,
     output_scale: &usize,
 ) -> Result<ArrayRef> {
-    let mut decimal_builder =
-        DecimalBuilder::new(array.len(), *output_precision, *output_scale);
     let array = array.as_any().downcast_ref::<DecimalArray>().unwrap();
-    if input_scale > output_scale {
+
+    let output_array = if input_scale > output_scale {
         // For example, input_scale is 4 and output_scale is 3;
         // Original value is 11234_i128, and will be cast to 1123_i128.
         let div = 10_i128.pow((input_scale - output_scale) as u32);
-        for i in 0..array.len() {
-            if array.is_null(i) {
-                decimal_builder.append_null()?;
-            } else {
-                decimal_builder.append_value(array.value(i) / div)?;
-            }
-        }
+        array
+            .iter()
+            .map(|v| v.map(|v| v / div))
+            .collect::<DecimalArray>()
     } else {
         // For example, input_scale is 3 and output_scale is 4;
         // Original value is 1123_i128, and will be cast to 11230_i128.
         let mul = 10_i128.pow((output_scale - input_scale) as u32);
-        for i in 0..array.len() {
-            if array.is_null(i) {
-                decimal_builder.append_null()?;
-            } else {
-                decimal_builder.append_value(array.value(i) * mul)?;
-            }
-        }
+        array
+            .iter()
+            .map(|v| v.map(|v| v * mul))
+            .collect::<DecimalArray>()
     }
-    Ok(Arc::new(decimal_builder.finish()))
+    .with_precision_and_scale(*output_precision, *output_scale)?;
+
+    Ok(Arc::new(output_array))
 }
 
 /// Cast an array by changing its array_data type to the desired type
@@ -2071,24 +2068,15 @@ mod tests {
         };
     }
 
-    // TODO remove this function if the decimal array has the creator function
     fn create_decimal_array(
         array: &[Option<i128>],
         precision: usize,
         scale: usize,
     ) -> Result<DecimalArray> {
-        let mut decimal_builder = DecimalBuilder::new(array.len(), precision, scale);
-        for value in array {
-            match value {
-                None => {
-                    decimal_builder.append_null()?;
-                }
-                Some(v) => {
-                    decimal_builder.append_value(*v)?;
-                }
-            }
-        }
-        Ok(decimal_builder.finish())
+        array
+            .iter()
+            .collect::<DecimalArray>()
+            .with_precision_and_scale(precision, scale)
     }
 
     #[test]
diff --git a/arrow/src/compute/kernels/sort.rs b/arrow/src/compute/kernels/sort.rs
index 3553cd3..37ece56 100644
--- a/arrow/src/compute/kernels/sort.rs
+++ b/arrow/src/compute/kernels/sort.rs
@@ -1107,16 +1107,10 @@ mod tests {
     use std::sync::Arc;
 
     fn create_decimal_array(data: &[Option<i128>]) -> DecimalArray {
-        let mut builder = DecimalBuilder::new(20, 23, 6);
-
-        for d in data {
-            if let Some(v) = d {
-                builder.append_value(*v).unwrap();
-            } else {
-                builder.append_null().unwrap();
-            }
-        }
-        builder.finish()
+        data.iter()
+            .collect::<DecimalArray>()
+            .with_precision_and_scale(23, 6)
+            .unwrap()
     }
 
     fn test_sort_to_indices_decimal_array(
diff --git a/arrow/src/compute/kernels/take.rs b/arrow/src/compute/kernels/take.rs
index a3b5283..562db0f 100644
--- a/arrow/src/compute/kernels/take.rs
+++ b/arrow/src/compute/kernels/take.rs
@@ -496,27 +496,30 @@ where
     IndexType: ArrowNumericType,
     IndexType::Native: ToPrimitive,
 {
-    // TODO optimize decimal take and construct decimal array from MutableBuffer
-    let mut builder = DecimalBuilder::new(
-        indices.len(),
-        decimal_values.precision(),
-        decimal_values.scale(),
-    );
-    for i in 0..indices.len() {
-        if indices.is_null(i) {
-            builder.append_null()?;
-        } else {
-            let index = ToPrimitive::to_usize(&indices.value(i)).ok_or_else(|| {
-                ArrowError::ComputeError("Cast to usize failed".to_string())
-            })?;
-            if decimal_values.is_null(index) {
-                builder.append_null()?
-            } else {
-                builder.append_value(decimal_values.value(index))?
-            }
-        }
-    }
-    Ok(builder.finish())
+    indices
+        .iter()
+        .map(|index| {
+            // Use type annotations below for readability (was blowing
+            // my mind otherwise)
+            let t: Option<Result<Option<_>>> = index.map(|index| {
+                let index = ToPrimitive::to_usize(&index).ok_or_else(|| {
+                    ArrowError::ComputeError("Cast to usize failed".to_string())
+                })?;
+
+                if decimal_values.is_null(index) {
+                    Ok(None)
+                } else {
+                    Ok(Some(decimal_values.value(index)))
+                }
+            });
+            let t: Result<Option<Option<_>>> = t.transpose();
+            let t: Result<Option<_>> = t.map(|t| t.flatten());
+            t
+        })
+        .collect::<Result<DecimalArray>>()?
+        // PERF: we could avoid re-validating that the data in
+        // DecimalArray was in range as we know it came from a valid DecimalArray
+        .with_precision_and_scale(decimal_values.precision(), decimal_values.scale())
 }
 
 /// `take` implementation for all primitive arrays
@@ -965,30 +968,19 @@ mod tests {
         precision: &usize,
         scale: &usize,
     ) -> Result<()> {
-        let mut builder = DecimalBuilder::new(data.len(), *precision, *scale);
-        for value in data {
-            match value {
-                None => {
-                    builder.append_null()?;
-                }
-                Some(v) => {
-                    builder.append_value(v)?;
-                }
-            }
-        }
-        let output = builder.finish();
-        let mut builder = DecimalBuilder::new(expected_data.len(), *precision, *scale);
-        for value in expected_data {
-            match value {
-                None => {
-                    builder.append_null()?;
-                }
-                Some(v) => {
-                    builder.append_value(v)?;
-                }
-            }
-        }
-        let expected = Arc::new(builder.finish()) as ArrayRef;
+        let output = data
+            .into_iter()
+            .collect::<DecimalArray>()
+            .with_precision_and_scale(*precision, *scale)
+            .unwrap();
+
+        let expected = expected_data
+            .into_iter()
+            .collect::<DecimalArray>()
+            .with_precision_and_scale(*precision, *scale)
+            .unwrap();
+
+        let expected = Arc::new(expected) as ArrayRef;
         let output = take(&output, index, options).unwrap();
         assert_eq!(&output, &expected);
         Ok(())
diff --git a/arrow/src/ffi.rs b/arrow/src/ffi.rs
index 2d9577c..ac8fc19 100644
--- a/arrow/src/ffi.rs
+++ b/arrow/src/ffi.rs
@@ -760,8 +760,8 @@ mod tests {
     use super::*;
     use crate::array::{
         make_array, Array, ArrayData, BinaryOffsetSizeTrait, BooleanArray, DecimalArray,
-        DecimalBuilder, GenericBinaryArray, GenericListArray, GenericStringArray,
-        Int32Array, OffsetSizeTrait, StringOffsetSizeTrait, Time32MillisecondArray,
+        GenericBinaryArray, GenericListArray, GenericStringArray, Int32Array,
+        OffsetSizeTrait, StringOffsetSizeTrait, Time32MillisecondArray,
         TimestampMillisecondArray,
     };
     use crate::compute::kernels;
@@ -794,11 +794,11 @@ mod tests {
     #[test]
     fn test_decimal_round_trip() -> Result<()> {
         // create an array natively
-        let mut builder = DecimalBuilder::new(5, 6, 2);
-        builder.append_value(12345_i128).unwrap();
-        builder.append_value(-12345_i128).unwrap();
-        builder.append_null().unwrap();
-        let original_array = builder.finish();
+        let original_array = [Some(12345_i128), Some(-12345_i128), None]
+            .into_iter()
+            .collect::<DecimalArray>()
+            .with_precision_and_scale(6, 2)
+            .unwrap();
 
         // export it
         let array = ArrowArray::try_from(original_array.data().clone())?;
diff --git a/arrow/src/util/pretty.rs b/arrow/src/util/pretty.rs
index 4b67f3d..f7e05bc 100644
--- a/arrow/src/util/pretty.rs
+++ b/arrow/src/util/pretty.rs
@@ -119,7 +119,7 @@ mod tests {
     };
 
     use super::*;
-    use crate::array::{DecimalBuilder, FixedSizeListBuilder, Int32Array};
+    use crate::array::{DecimalArray, FixedSizeListBuilder, Int32Array};
     use std::fmt::Write;
     use std::sync::Arc;
 
@@ -517,17 +517,16 @@ mod tests {
 
     #[test]
     fn test_decimal_display() -> Result<()> {
-        let capacity = 10;
         let precision = 10;
         let scale = 2;
 
-        let mut builder = DecimalBuilder::new(capacity, precision, scale);
-        builder.append_value(101).unwrap();
-        builder.append_null().unwrap();
-        builder.append_value(200).unwrap();
-        builder.append_value(3040).unwrap();
+        let array = [Some(101), None, Some(200), Some(3040)]
+            .into_iter()
+            .collect::<DecimalArray>()
+            .with_precision_and_scale(precision, scale)
+            .unwrap();
 
-        let dm = Arc::new(builder.finish()) as ArrayRef;
+        let dm = Arc::new(array) as ArrayRef;
 
         let schema = Arc::new(Schema::new(vec![Field::new(
             "f",
@@ -558,17 +557,16 @@ mod tests {
 
     #[test]
     fn test_decimal_display_zero_scale() -> Result<()> {
-        let capacity = 10;
         let precision = 5;
         let scale = 0;
 
-        let mut builder = DecimalBuilder::new(capacity, precision, scale);
-        builder.append_value(101).unwrap();
-        builder.append_null().unwrap();
-        builder.append_value(200).unwrap();
-        builder.append_value(3040).unwrap();
+        let array = [Some(101), None, Some(200), Some(3040)]
+            .into_iter()
+            .collect::<DecimalArray>()
+            .with_precision_and_scale(precision, scale)
+            .unwrap();
 
-        let dm = Arc::new(builder.finish()) as ArrayRef;
+        let dm = Arc::new(array) as ArrayRef;
 
         let schema = Arc::new(Schema::new(vec![Field::new(
             "f",