You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2022/08/20 15:22:10 UTC
[arrow-rs] branch master updated: Refactoring DecimalBuilder constructors (#2517)
This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 65cae43dc Refactoring DecimalBuilder constructors (#2517)
65cae43dc is described below
commit 65cae43dc76635471823c87e5a5a2d5e100cb8d2
Author: Vrishabh <ps...@gmail.com>
AuthorDate: Sat Aug 20 20:52:05 2022 +0530
Refactoring DecimalBuilder constructors (#2517)
* Refactor decimal builder
* Fixing errors
* refactor decimal256builder constructor
* Capacity in terms of elements
Co-authored-by: Raphael Taylor-Davies <r....@googlemail.com>
---
arrow/benches/builder.rs | 5 ++--
arrow/benches/cast_kernels.rs | 4 +--
arrow/benches/decimal_validate.rs | 4 +--
arrow/src/array/array_decimal.rs | 6 ++--
arrow/src/array/builder/decimal_builder.rs | 34 ++++++++++++++--------
.../src/array/builder/fixed_size_binary_builder.rs | 16 ++++------
arrow/src/array/builder/struct_builder.rs | 6 ++--
arrow/src/array/data.rs | 2 +-
arrow/src/array/equal/mod.rs | 2 +-
arrow/src/csv/reader.rs | 3 +-
arrow/src/util/integration_util.rs | 6 ++--
11 files changed, 49 insertions(+), 39 deletions(-)
diff --git a/arrow/benches/builder.rs b/arrow/benches/builder.rs
index b61423396..7651c9e67 100644
--- a/arrow/benches/builder.rs
+++ b/arrow/benches/builder.rs
@@ -112,7 +112,7 @@ fn bench_decimal128(c: &mut Criterion) {
c.bench_function("bench_decimal128_builder", |b| {
b.iter(|| {
let mut rng = rand::thread_rng();
- let mut decimal_builder = Decimal128Builder::new(BATCH_SIZE, 38, 0);
+ let mut decimal_builder = Decimal128Builder::with_capacity(BATCH_SIZE, 38, 0);
for _ in 0..BATCH_SIZE {
decimal_builder
.append_value(rng.gen_range::<i128, _>(0..9999999999))
@@ -127,7 +127,8 @@ fn bench_decimal256(c: &mut Criterion) {
c.bench_function("bench_decimal128_builder", |b| {
b.iter(|| {
let mut rng = rand::thread_rng();
- let mut decimal_builder = Decimal256Builder::new(BATCH_SIZE, 76, 10);
+ let mut decimal_builder =
+ Decimal256Builder::with_capacity(BATCH_SIZE, 76, 10);
for _ in 0..BATCH_SIZE {
decimal_builder
.append_value(&Decimal256::from(BigInt::from(
diff --git a/arrow/benches/cast_kernels.rs b/arrow/benches/cast_kernels.rs
index a7bae35e7..4e52e7684 100644
--- a/arrow/benches/cast_kernels.rs
+++ b/arrow/benches/cast_kernels.rs
@@ -84,7 +84,7 @@ fn build_utf8_date_time_array(size: usize, with_nulls: bool) -> ArrayRef {
fn build_decimal128_array(size: usize, precision: usize, scale: usize) -> ArrayRef {
let mut rng = seedable_rng();
- let mut builder = Decimal128Builder::new(size, precision, scale);
+ let mut builder = Decimal128Builder::with_capacity(size, precision, scale);
for _ in 0..size {
let _ = builder.append_value(rng.gen_range::<i128, _>(0..1000000000));
@@ -94,7 +94,7 @@ fn build_decimal128_array(size: usize, precision: usize, scale: usize) -> ArrayR
fn build_decimal256_array(size: usize, precision: usize, scale: usize) -> ArrayRef {
let mut rng = seedable_rng();
- let mut builder = Decimal256Builder::new(size, precision, scale);
+ let mut builder = Decimal256Builder::with_capacity(size, precision, scale);
let mut bytes = [0; 32];
for _ in 0..size {
let num = rng.gen_range::<i128, _>(0..1000000000);
diff --git a/arrow/benches/decimal_validate.rs b/arrow/benches/decimal_validate.rs
index 1c9406abc..555373e4a 100644
--- a/arrow/benches/decimal_validate.rs
+++ b/arrow/benches/decimal_validate.rs
@@ -40,7 +40,7 @@ fn validate_decimal256_array(array: Decimal256Array) {
fn validate_decimal128_benchmark(c: &mut Criterion) {
let mut rng = rand::thread_rng();
let size: i128 = 20000;
- let mut decimal_builder = Decimal128Builder::new(size as usize, 38, 0);
+ let mut decimal_builder = Decimal128Builder::with_capacity(size as usize, 38, 0);
for _ in 0..size {
decimal_builder
.append_value(rng.gen_range::<i128, _>(0..999999999999))
@@ -59,7 +59,7 @@ fn validate_decimal128_benchmark(c: &mut Criterion) {
fn validate_decimal256_benchmark(c: &mut Criterion) {
let mut rng = rand::thread_rng();
let size: i128 = 20000;
- let mut decimal_builder = Decimal256Builder::new(size as usize, 76, 0);
+ let mut decimal_builder = Decimal256Builder::with_capacity(size as usize, 76, 0);
for _ in 0..size {
let v = rng.gen_range::<i128, _>(0..999999999999999);
let decimal = Decimal256::from_big_int(&BigInt::from(v), 76, 0).unwrap();
diff --git a/arrow/src/array/array_decimal.rs b/arrow/src/array/array_decimal.rs
index 540024a92..aa25e66a9 100644
--- a/arrow/src/array/array_decimal.rs
+++ b/arrow/src/array/array_decimal.rs
@@ -571,7 +571,7 @@ mod tests {
#[test]
#[cfg(not(feature = "force_validate"))]
fn test_decimal_append_error_value() {
- let mut decimal_builder = Decimal128Builder::new(10, 5, 3);
+ let mut decimal_builder = Decimal128Builder::with_capacity(10, 5, 3);
let mut result = decimal_builder.append_value(123456);
let mut error = result.unwrap_err();
assert_eq!(
@@ -588,7 +588,7 @@ mod tests {
let arr = decimal_builder.finish();
assert_eq!("12.345", arr.value_as_string(1));
- decimal_builder = Decimal128Builder::new(10, 2, 1);
+ decimal_builder = Decimal128Builder::new(2, 1);
result = decimal_builder.append_value(100);
error = result.unwrap_err();
assert_eq!(
@@ -884,7 +884,7 @@ mod tests {
#[test]
fn test_decimal256_iter() {
- let mut builder = Decimal256Builder::new(30, 76, 6);
+ let mut builder = Decimal256Builder::with_capacity(30, 76, 6);
let value = BigInt::from_str_radix("12345", 10).unwrap();
let decimal1 = Decimal256::from_big_int(&value, 76, 6).unwrap();
builder.append_value(&decimal1).unwrap();
diff --git a/arrow/src/array/builder/decimal_builder.rs b/arrow/src/array/builder/decimal_builder.rs
index 988aba1ec..b4c877c71 100644
--- a/arrow/src/array/builder/decimal_builder.rs
+++ b/arrow/src/array/builder/decimal_builder.rs
@@ -61,9 +61,14 @@ pub struct Decimal256Builder {
impl Decimal128Builder {
const BYTE_LENGTH: i32 = 16;
- /// Creates a new [`Decimal128Builder`], `capacity` is the number of bytes in the values
- /// array
- pub fn new(capacity: usize, precision: usize, scale: usize) -> Self {
+ /// Creates a new [`Decimal128Builder`]
+ pub fn new(precision: usize, scale: usize) -> Self {
+ Self::with_capacity(1024, precision, scale)
+ }
+
+ /// Creates a new [`Decimal128Builder`], `capacity` is the number of decimal values
+ /// that can be appended without reallocating
+ pub fn with_capacity(capacity: usize, precision: usize, scale: usize) -> Self {
Self {
builder: FixedSizeBinaryBuilder::with_capacity(capacity, Self::BYTE_LENGTH),
precision,
@@ -155,9 +160,14 @@ impl ArrayBuilder for Decimal128Builder {
impl Decimal256Builder {
const BYTE_LENGTH: i32 = 32;
- /// Creates a new [`Decimal256Builder`], `capacity` is the number of bytes in the values
- /// array
- pub fn new(capacity: usize, precision: usize, scale: usize) -> Self {
+ /// Creates a new [`Decimal256Builder`]
+ pub fn new(precision: usize, scale: usize) -> Self {
+ Self::with_capacity(1024, precision, scale)
+ }
+
+ /// Creates a new [`Decimal256Builder`], `capacity` is the number of decimal values
+ /// that can be appended without reallocating
+ pub fn with_capacity(capacity: usize, precision: usize, scale: usize) -> Self {
Self {
builder: FixedSizeBinaryBuilder::with_capacity(capacity, Self::BYTE_LENGTH),
precision,
@@ -245,7 +255,7 @@ mod tests {
#[test]
fn test_decimal_builder() {
- let mut builder = Decimal128Builder::new(30, 38, 6);
+ let mut builder = Decimal128Builder::new(38, 6);
builder.append_value(8_887_000_000_i128).unwrap();
builder.append_null();
@@ -263,7 +273,7 @@ mod tests {
#[test]
fn test_decimal_builder_with_decimal128() {
- let mut builder = Decimal128Builder::new(30, 38, 6);
+ let mut builder = Decimal128Builder::new(38, 6);
builder
.append_value(Decimal128::new_from_i128(30, 38, 8_887_000_000_i128))
@@ -283,7 +293,7 @@ mod tests {
#[test]
fn test_decimal256_builder() {
- let mut builder = Decimal256Builder::new(30, 40, 6);
+ let mut builder = Decimal256Builder::new(40, 6);
let mut bytes = [0_u8; 32];
bytes[0..16].clone_from_slice(&8_887_000_000_i128.to_le_bytes());
@@ -327,7 +337,7 @@ mod tests {
expected = "Decimal value does not have the same precision or scale as Decimal256Builder"
)]
fn test_decimal256_builder_unmatched_precision_scale() {
- let mut builder = Decimal256Builder::new(30, 10, 6);
+ let mut builder = Decimal256Builder::with_capacity(30, 10, 6);
let mut bytes = [0_u8; 32];
bytes[0..16].clone_from_slice(&8_887_000_000_i128.to_le_bytes());
@@ -340,7 +350,7 @@ mod tests {
expected = "9999999999999999999999999999999999999999999999999999999999999999999999999999 is too large to store in a Decimal256 of precision 75. Max is 999999999999999999999999999999999999999999999999999999999999999999999999999"
)]
fn test_decimal256_builder_out_of_range_precision_scale() {
- let mut builder = Decimal256Builder::new(30, 75, 6);
+ let mut builder = Decimal256Builder::new(75, 6);
let big_value = BigInt::from_str_radix("9999999999999999999999999999999999999999999999999999999999999999999999999999", 10).unwrap();
let value = Decimal256::from_big_int(&big_value, 75, 6).unwrap();
@@ -352,7 +362,7 @@ mod tests {
expected = "9999999999999999999999999999999999999999999999999999999999999999999999999999 is too large to store in a Decimal256 of precision 75. Max is 999999999999999999999999999999999999999999999999999999999999999999999999999"
)]
fn test_decimal256_data_validation() {
- let mut builder = Decimal256Builder::new(30, 75, 6);
+ let mut builder = Decimal256Builder::new(75, 6);
// Disable validation at builder
unsafe {
builder.disable_value_validation();
diff --git a/arrow/src/array/builder/fixed_size_binary_builder.rs b/arrow/src/array/builder/fixed_size_binary_builder.rs
index f0483ccfb..30c25e0a6 100644
--- a/arrow/src/array/builder/fixed_size_binary_builder.rs
+++ b/arrow/src/array/builder/fixed_size_binary_builder.rs
@@ -38,8 +38,8 @@ impl FixedSizeBinaryBuilder {
Self::with_capacity(1024, byte_width)
}
- /// Creates a new [`FixedSizeBinaryBuilder`], `capacity` is the number of bytes in the values
- /// buffer
+ /// Creates a new [`FixedSizeBinaryBuilder`], `capacity` is the number of byte slices
+ /// that can be appended without reallocating
pub fn with_capacity(capacity: usize, byte_width: i32) -> Self {
assert!(
byte_width >= 0,
@@ -47,12 +47,8 @@ impl FixedSizeBinaryBuilder {
byte_width
);
Self {
- values_builder: UInt8BufferBuilder::new(capacity),
- null_buffer_builder: NullBufferBuilder::new(if byte_width > 0 {
- capacity / byte_width as usize
- } else {
- 0
- }),
+ values_builder: UInt8BufferBuilder::new(capacity * byte_width as usize),
+ null_buffer_builder: NullBufferBuilder::new(capacity),
value_length: byte_width,
}
}
@@ -137,7 +133,7 @@ mod tests {
#[test]
fn test_fixed_size_binary_builder() {
- let mut builder = FixedSizeBinaryBuilder::with_capacity(15, 5);
+ let mut builder = FixedSizeBinaryBuilder::with_capacity(3, 5);
// [b"hello", null, "arrow"]
builder.append_value(b"hello").unwrap();
@@ -176,7 +172,7 @@ mod tests {
expected = "Byte slice does not have the same length as FixedSizeBinaryBuilder value lengths"
)]
fn test_fixed_size_binary_builder_with_inconsistent_value_length() {
- let mut builder = FixedSizeBinaryBuilder::with_capacity(15, 4);
+ let mut builder = FixedSizeBinaryBuilder::with_capacity(1, 4);
builder.append_value(b"hello").unwrap();
}
#[test]
diff --git a/arrow/src/array/builder/struct_builder.rs b/arrow/src/array/builder/struct_builder.rs
index be7fa1405..48932f199 100644
--- a/arrow/src/array/builder/struct_builder.rs
+++ b/arrow/src/array/builder/struct_builder.rs
@@ -111,9 +111,9 @@ pub fn make_builder(datatype: &DataType, capacity: usize) -> Box<dyn ArrayBuilde
DataType::FixedSizeBinary(len) => {
Box::new(FixedSizeBinaryBuilder::with_capacity(capacity, *len))
}
- DataType::Decimal128(precision, scale) => {
- Box::new(Decimal128Builder::new(capacity, *precision, *scale))
- }
+ DataType::Decimal128(precision, scale) => Box::new(
+ Decimal128Builder::with_capacity(capacity, *precision, *scale),
+ ),
DataType::Utf8 => Box::new(StringBuilder::new(capacity)),
DataType::Date32 => Box::new(Date32Builder::new(capacity)),
DataType::Date64 => Box::new(Date64Builder::new(capacity)),
diff --git a/arrow/src/array/data.rs b/arrow/src/array/data.rs
index 1a7f991ac..1ed3f01bb 100644
--- a/arrow/src/array/data.rs
+++ b/arrow/src/array/data.rs
@@ -2847,7 +2847,7 @@ mod tests {
#[test]
fn test_decimal_validation() {
- let mut builder = Decimal128Builder::new(4, 10, 4);
+ let mut builder = Decimal128Builder::with_capacity(4, 10, 4);
builder.append_value(10000).unwrap();
builder.append_value(20000).unwrap();
let array = builder.finish();
diff --git a/arrow/src/array/equal/mod.rs b/arrow/src/array/equal/mod.rs
index dd32574bb..2984dad89 100644
--- a/arrow/src/array/equal/mod.rs
+++ b/arrow/src/array/equal/mod.rs
@@ -768,7 +768,7 @@ mod tests {
fn create_fixed_size_binary_array<U: AsRef<[u8]>, T: AsRef<[Option<U>]>>(
data: T,
) -> ArrayData {
- let mut builder = FixedSizeBinaryBuilder::with_capacity(15, 5);
+ let mut builder = FixedSizeBinaryBuilder::with_capacity(data.as_ref().len(), 5);
for d in data.as_ref() {
if let Some(v) = d {
diff --git a/arrow/src/csv/reader.rs b/arrow/src/csv/reader.rs
index ac26b377f..5917f6797 100644
--- a/arrow/src/csv/reader.rs
+++ b/arrow/src/csv/reader.rs
@@ -699,7 +699,8 @@ fn build_decimal_array(
precision: usize,
scale: usize,
) -> Result<ArrayRef> {
- let mut decimal_builder = Decimal128Builder::new(rows.len(), precision, scale);
+ let mut decimal_builder =
+ Decimal128Builder::with_capacity(rows.len(), precision, scale);
for row in rows {
let col_s = row.get(col_idx);
match col_s {
diff --git a/arrow/src/util/integration_util.rs b/arrow/src/util/integration_util.rs
index 4a3cc2fcb..5268f55d9 100644
--- a/arrow/src/util/integration_util.rs
+++ b/arrow/src/util/integration_util.rs
@@ -776,7 +776,8 @@ pub fn array_from_json(
}
}
DataType::Decimal128(precision, scale) => {
- let mut b = Decimal128Builder::new(json_col.count, *precision, *scale);
+ let mut b =
+ Decimal128Builder::with_capacity(json_col.count, *precision, *scale);
// C++ interop tests involve incompatible decimal values
unsafe {
b.disable_value_validation();
@@ -798,7 +799,8 @@ pub fn array_from_json(
Ok(Arc::new(b.finish()))
}
DataType::Decimal256(precision, scale) => {
- let mut b = Decimal256Builder::new(json_col.count, *precision, *scale);
+ let mut b =
+ Decimal256Builder::with_capacity(json_col.count, *precision, *scale);
// C++ interop tests involve incompatible decimal values
unsafe {
b.disable_value_validation();