You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2022/08/20 15:22:10 UTC

[arrow-rs] branch master updated: Refactoring DecimalBuilder constructors (#2517)

This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 65cae43dc Refactoring DecimalBuilder constructors (#2517)
65cae43dc is described below

commit 65cae43dc76635471823c87e5a5a2d5e100cb8d2
Author: Vrishabh <ps...@gmail.com>
AuthorDate: Sat Aug 20 20:52:05 2022 +0530

    Refactoring DecimalBuilder constructors (#2517)
    
    * Refactor decimal builder
    
    * Fixing errors
    
    * refactor decimal256builder constructor
    
    * Capacity in terms of elements
    
    Co-authored-by: Raphael Taylor-Davies <r....@googlemail.com>
---
 arrow/benches/builder.rs                           |  5 ++--
 arrow/benches/cast_kernels.rs                      |  4 +--
 arrow/benches/decimal_validate.rs                  |  4 +--
 arrow/src/array/array_decimal.rs                   |  6 ++--
 arrow/src/array/builder/decimal_builder.rs         | 34 ++++++++++++++--------
 .../src/array/builder/fixed_size_binary_builder.rs | 16 ++++------
 arrow/src/array/builder/struct_builder.rs          |  6 ++--
 arrow/src/array/data.rs                            |  2 +-
 arrow/src/array/equal/mod.rs                       |  2 +-
 arrow/src/csv/reader.rs                            |  3 +-
 arrow/src/util/integration_util.rs                 |  6 ++--
 11 files changed, 49 insertions(+), 39 deletions(-)

diff --git a/arrow/benches/builder.rs b/arrow/benches/builder.rs
index b61423396..7651c9e67 100644
--- a/arrow/benches/builder.rs
+++ b/arrow/benches/builder.rs
@@ -112,7 +112,7 @@ fn bench_decimal128(c: &mut Criterion) {
     c.bench_function("bench_decimal128_builder", |b| {
         b.iter(|| {
             let mut rng = rand::thread_rng();
-            let mut decimal_builder = Decimal128Builder::new(BATCH_SIZE, 38, 0);
+            let mut decimal_builder = Decimal128Builder::with_capacity(BATCH_SIZE, 38, 0);
             for _ in 0..BATCH_SIZE {
                 decimal_builder
                     .append_value(rng.gen_range::<i128, _>(0..9999999999))
@@ -127,7 +127,8 @@ fn bench_decimal256(c: &mut Criterion) {
     c.bench_function("bench_decimal128_builder", |b| {
         b.iter(|| {
             let mut rng = rand::thread_rng();
-            let mut decimal_builder = Decimal256Builder::new(BATCH_SIZE, 76, 10);
+            let mut decimal_builder =
+                Decimal256Builder::with_capacity(BATCH_SIZE, 76, 10);
             for _ in 0..BATCH_SIZE {
                 decimal_builder
                     .append_value(&Decimal256::from(BigInt::from(
diff --git a/arrow/benches/cast_kernels.rs b/arrow/benches/cast_kernels.rs
index a7bae35e7..4e52e7684 100644
--- a/arrow/benches/cast_kernels.rs
+++ b/arrow/benches/cast_kernels.rs
@@ -84,7 +84,7 @@ fn build_utf8_date_time_array(size: usize, with_nulls: bool) -> ArrayRef {
 
 fn build_decimal128_array(size: usize, precision: usize, scale: usize) -> ArrayRef {
     let mut rng = seedable_rng();
-    let mut builder = Decimal128Builder::new(size, precision, scale);
+    let mut builder = Decimal128Builder::with_capacity(size, precision, scale);
 
     for _ in 0..size {
         let _ = builder.append_value(rng.gen_range::<i128, _>(0..1000000000));
@@ -94,7 +94,7 @@ fn build_decimal128_array(size: usize, precision: usize, scale: usize) -> ArrayR
 
 fn build_decimal256_array(size: usize, precision: usize, scale: usize) -> ArrayRef {
     let mut rng = seedable_rng();
-    let mut builder = Decimal256Builder::new(size, precision, scale);
+    let mut builder = Decimal256Builder::with_capacity(size, precision, scale);
     let mut bytes = [0; 32];
     for _ in 0..size {
         let num = rng.gen_range::<i128, _>(0..1000000000);
diff --git a/arrow/benches/decimal_validate.rs b/arrow/benches/decimal_validate.rs
index 1c9406abc..555373e4a 100644
--- a/arrow/benches/decimal_validate.rs
+++ b/arrow/benches/decimal_validate.rs
@@ -40,7 +40,7 @@ fn validate_decimal256_array(array: Decimal256Array) {
 fn validate_decimal128_benchmark(c: &mut Criterion) {
     let mut rng = rand::thread_rng();
     let size: i128 = 20000;
-    let mut decimal_builder = Decimal128Builder::new(size as usize, 38, 0);
+    let mut decimal_builder = Decimal128Builder::with_capacity(size as usize, 38, 0);
     for _ in 0..size {
         decimal_builder
             .append_value(rng.gen_range::<i128, _>(0..999999999999))
@@ -59,7 +59,7 @@ fn validate_decimal128_benchmark(c: &mut Criterion) {
 fn validate_decimal256_benchmark(c: &mut Criterion) {
     let mut rng = rand::thread_rng();
     let size: i128 = 20000;
-    let mut decimal_builder = Decimal256Builder::new(size as usize, 76, 0);
+    let mut decimal_builder = Decimal256Builder::with_capacity(size as usize, 76, 0);
     for _ in 0..size {
         let v = rng.gen_range::<i128, _>(0..999999999999999);
         let decimal = Decimal256::from_big_int(&BigInt::from(v), 76, 0).unwrap();
diff --git a/arrow/src/array/array_decimal.rs b/arrow/src/array/array_decimal.rs
index 540024a92..aa25e66a9 100644
--- a/arrow/src/array/array_decimal.rs
+++ b/arrow/src/array/array_decimal.rs
@@ -571,7 +571,7 @@ mod tests {
     #[test]
     #[cfg(not(feature = "force_validate"))]
     fn test_decimal_append_error_value() {
-        let mut decimal_builder = Decimal128Builder::new(10, 5, 3);
+        let mut decimal_builder = Decimal128Builder::with_capacity(10, 5, 3);
         let mut result = decimal_builder.append_value(123456);
         let mut error = result.unwrap_err();
         assert_eq!(
@@ -588,7 +588,7 @@ mod tests {
         let arr = decimal_builder.finish();
         assert_eq!("12.345", arr.value_as_string(1));
 
-        decimal_builder = Decimal128Builder::new(10, 2, 1);
+        decimal_builder = Decimal128Builder::new(2, 1);
         result = decimal_builder.append_value(100);
         error = result.unwrap_err();
         assert_eq!(
@@ -884,7 +884,7 @@ mod tests {
 
     #[test]
     fn test_decimal256_iter() {
-        let mut builder = Decimal256Builder::new(30, 76, 6);
+        let mut builder = Decimal256Builder::with_capacity(30, 76, 6);
         let value = BigInt::from_str_radix("12345", 10).unwrap();
         let decimal1 = Decimal256::from_big_int(&value, 76, 6).unwrap();
         builder.append_value(&decimal1).unwrap();
diff --git a/arrow/src/array/builder/decimal_builder.rs b/arrow/src/array/builder/decimal_builder.rs
index 988aba1ec..b4c877c71 100644
--- a/arrow/src/array/builder/decimal_builder.rs
+++ b/arrow/src/array/builder/decimal_builder.rs
@@ -61,9 +61,14 @@ pub struct Decimal256Builder {
 
 impl Decimal128Builder {
     const BYTE_LENGTH: i32 = 16;
-    /// Creates a new [`Decimal128Builder`], `capacity` is the number of bytes in the values
-    /// array
-    pub fn new(capacity: usize, precision: usize, scale: usize) -> Self {
+    /// Creates a new [`Decimal128Builder`]
+    pub fn new(precision: usize, scale: usize) -> Self {
+        Self::with_capacity(1024, precision, scale)
+    }
+
+    /// Creates a new [`Decimal128Builder`], `capacity` is the number of decimal values
+    /// that can be appended without reallocating
+    pub fn with_capacity(capacity: usize, precision: usize, scale: usize) -> Self {
         Self {
             builder: FixedSizeBinaryBuilder::with_capacity(capacity, Self::BYTE_LENGTH),
             precision,
@@ -155,9 +160,14 @@ impl ArrayBuilder for Decimal128Builder {
 
 impl Decimal256Builder {
     const BYTE_LENGTH: i32 = 32;
-    /// Creates a new [`Decimal256Builder`], `capacity` is the number of bytes in the values
-    /// array
-    pub fn new(capacity: usize, precision: usize, scale: usize) -> Self {
+    /// Creates a new [`Decimal256Builder`]
+    pub fn new(precision: usize, scale: usize) -> Self {
+        Self::with_capacity(1024, precision, scale)
+    }
+
+    /// Creates a new [`Decimal256Builder`], `capacity` is the number of decimal values
+    /// that can be appended without reallocating
+    pub fn with_capacity(capacity: usize, precision: usize, scale: usize) -> Self {
         Self {
             builder: FixedSizeBinaryBuilder::with_capacity(capacity, Self::BYTE_LENGTH),
             precision,
@@ -245,7 +255,7 @@ mod tests {
 
     #[test]
     fn test_decimal_builder() {
-        let mut builder = Decimal128Builder::new(30, 38, 6);
+        let mut builder = Decimal128Builder::new(38, 6);
 
         builder.append_value(8_887_000_000_i128).unwrap();
         builder.append_null();
@@ -263,7 +273,7 @@ mod tests {
 
     #[test]
     fn test_decimal_builder_with_decimal128() {
-        let mut builder = Decimal128Builder::new(30, 38, 6);
+        let mut builder = Decimal128Builder::new(38, 6);
 
         builder
             .append_value(Decimal128::new_from_i128(30, 38, 8_887_000_000_i128))
@@ -283,7 +293,7 @@ mod tests {
 
     #[test]
     fn test_decimal256_builder() {
-        let mut builder = Decimal256Builder::new(30, 40, 6);
+        let mut builder = Decimal256Builder::new(40, 6);
 
         let mut bytes = [0_u8; 32];
         bytes[0..16].clone_from_slice(&8_887_000_000_i128.to_le_bytes());
@@ -327,7 +337,7 @@ mod tests {
         expected = "Decimal value does not have the same precision or scale as Decimal256Builder"
     )]
     fn test_decimal256_builder_unmatched_precision_scale() {
-        let mut builder = Decimal256Builder::new(30, 10, 6);
+        let mut builder = Decimal256Builder::with_capacity(30, 10, 6);
 
         let mut bytes = [0_u8; 32];
         bytes[0..16].clone_from_slice(&8_887_000_000_i128.to_le_bytes());
@@ -340,7 +350,7 @@ mod tests {
         expected = "9999999999999999999999999999999999999999999999999999999999999999999999999999 is too large to store in a Decimal256 of precision 75. Max is 999999999999999999999999999999999999999999999999999999999999999999999999999"
     )]
     fn test_decimal256_builder_out_of_range_precision_scale() {
-        let mut builder = Decimal256Builder::new(30, 75, 6);
+        let mut builder = Decimal256Builder::new(75, 6);
 
         let big_value = BigInt::from_str_radix("9999999999999999999999999999999999999999999999999999999999999999999999999999", 10).unwrap();
         let value = Decimal256::from_big_int(&big_value, 75, 6).unwrap();
@@ -352,7 +362,7 @@ mod tests {
         expected = "9999999999999999999999999999999999999999999999999999999999999999999999999999 is too large to store in a Decimal256 of precision 75. Max is 999999999999999999999999999999999999999999999999999999999999999999999999999"
     )]
     fn test_decimal256_data_validation() {
-        let mut builder = Decimal256Builder::new(30, 75, 6);
+        let mut builder = Decimal256Builder::new(75, 6);
         // Disable validation at builder
         unsafe {
             builder.disable_value_validation();
diff --git a/arrow/src/array/builder/fixed_size_binary_builder.rs b/arrow/src/array/builder/fixed_size_binary_builder.rs
index f0483ccfb..30c25e0a6 100644
--- a/arrow/src/array/builder/fixed_size_binary_builder.rs
+++ b/arrow/src/array/builder/fixed_size_binary_builder.rs
@@ -38,8 +38,8 @@ impl FixedSizeBinaryBuilder {
         Self::with_capacity(1024, byte_width)
     }
 
-    /// Creates a new [`FixedSizeBinaryBuilder`], `capacity` is the number of bytes in the values
-    /// buffer
+    /// Creates a new [`FixedSizeBinaryBuilder`], `capacity` is the number of byte slices
+    /// that can be appended without reallocating
     pub fn with_capacity(capacity: usize, byte_width: i32) -> Self {
         assert!(
             byte_width >= 0,
@@ -47,12 +47,8 @@ impl FixedSizeBinaryBuilder {
             byte_width
         );
         Self {
-            values_builder: UInt8BufferBuilder::new(capacity),
-            null_buffer_builder: NullBufferBuilder::new(if byte_width > 0 {
-                capacity / byte_width as usize
-            } else {
-                0
-            }),
+            values_builder: UInt8BufferBuilder::new(capacity * byte_width as usize),
+            null_buffer_builder: NullBufferBuilder::new(capacity),
             value_length: byte_width,
         }
     }
@@ -137,7 +133,7 @@ mod tests {
 
     #[test]
     fn test_fixed_size_binary_builder() {
-        let mut builder = FixedSizeBinaryBuilder::with_capacity(15, 5);
+        let mut builder = FixedSizeBinaryBuilder::with_capacity(3, 5);
 
         //  [b"hello", null, "arrow"]
         builder.append_value(b"hello").unwrap();
@@ -176,7 +172,7 @@ mod tests {
         expected = "Byte slice does not have the same length as FixedSizeBinaryBuilder value lengths"
     )]
     fn test_fixed_size_binary_builder_with_inconsistent_value_length() {
-        let mut builder = FixedSizeBinaryBuilder::with_capacity(15, 4);
+        let mut builder = FixedSizeBinaryBuilder::with_capacity(1, 4);
         builder.append_value(b"hello").unwrap();
     }
     #[test]
diff --git a/arrow/src/array/builder/struct_builder.rs b/arrow/src/array/builder/struct_builder.rs
index be7fa1405..48932f199 100644
--- a/arrow/src/array/builder/struct_builder.rs
+++ b/arrow/src/array/builder/struct_builder.rs
@@ -111,9 +111,9 @@ pub fn make_builder(datatype: &DataType, capacity: usize) -> Box<dyn ArrayBuilde
         DataType::FixedSizeBinary(len) => {
             Box::new(FixedSizeBinaryBuilder::with_capacity(capacity, *len))
         }
-        DataType::Decimal128(precision, scale) => {
-            Box::new(Decimal128Builder::new(capacity, *precision, *scale))
-        }
+        DataType::Decimal128(precision, scale) => Box::new(
+            Decimal128Builder::with_capacity(capacity, *precision, *scale),
+        ),
         DataType::Utf8 => Box::new(StringBuilder::new(capacity)),
         DataType::Date32 => Box::new(Date32Builder::new(capacity)),
         DataType::Date64 => Box::new(Date64Builder::new(capacity)),
diff --git a/arrow/src/array/data.rs b/arrow/src/array/data.rs
index 1a7f991ac..1ed3f01bb 100644
--- a/arrow/src/array/data.rs
+++ b/arrow/src/array/data.rs
@@ -2847,7 +2847,7 @@ mod tests {
 
     #[test]
     fn test_decimal_validation() {
-        let mut builder = Decimal128Builder::new(4, 10, 4);
+        let mut builder = Decimal128Builder::with_capacity(4, 10, 4);
         builder.append_value(10000).unwrap();
         builder.append_value(20000).unwrap();
         let array = builder.finish();
diff --git a/arrow/src/array/equal/mod.rs b/arrow/src/array/equal/mod.rs
index dd32574bb..2984dad89 100644
--- a/arrow/src/array/equal/mod.rs
+++ b/arrow/src/array/equal/mod.rs
@@ -768,7 +768,7 @@ mod tests {
     fn create_fixed_size_binary_array<U: AsRef<[u8]>, T: AsRef<[Option<U>]>>(
         data: T,
     ) -> ArrayData {
-        let mut builder = FixedSizeBinaryBuilder::with_capacity(15, 5);
+        let mut builder = FixedSizeBinaryBuilder::with_capacity(data.as_ref().len(), 5);
 
         for d in data.as_ref() {
             if let Some(v) = d {
diff --git a/arrow/src/csv/reader.rs b/arrow/src/csv/reader.rs
index ac26b377f..5917f6797 100644
--- a/arrow/src/csv/reader.rs
+++ b/arrow/src/csv/reader.rs
@@ -699,7 +699,8 @@ fn build_decimal_array(
     precision: usize,
     scale: usize,
 ) -> Result<ArrayRef> {
-    let mut decimal_builder = Decimal128Builder::new(rows.len(), precision, scale);
+    let mut decimal_builder =
+        Decimal128Builder::with_capacity(rows.len(), precision, scale);
     for row in rows {
         let col_s = row.get(col_idx);
         match col_s {
diff --git a/arrow/src/util/integration_util.rs b/arrow/src/util/integration_util.rs
index 4a3cc2fcb..5268f55d9 100644
--- a/arrow/src/util/integration_util.rs
+++ b/arrow/src/util/integration_util.rs
@@ -776,7 +776,8 @@ pub fn array_from_json(
             }
         }
         DataType::Decimal128(precision, scale) => {
-            let mut b = Decimal128Builder::new(json_col.count, *precision, *scale);
+            let mut b =
+                Decimal128Builder::with_capacity(json_col.count, *precision, *scale);
             // C++ interop tests involve incompatible decimal values
             unsafe {
                 b.disable_value_validation();
@@ -798,7 +799,8 @@ pub fn array_from_json(
             Ok(Arc::new(b.finish()))
         }
         DataType::Decimal256(precision, scale) => {
-            let mut b = Decimal256Builder::new(json_col.count, *precision, *scale);
+            let mut b =
+                Decimal256Builder::with_capacity(json_col.count, *precision, *scale);
             // C++ interop tests involve incompatible decimal values
             unsafe {
                 b.disable_value_validation();