You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2022/02/08 20:10:49 UTC
[arrow-rs] branch master updated: `DecimalArray` API ergonomics: add iter(), create from iter(), change precision / scale (#1223)
This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 35e16be `DecimalArray` API ergonomics: add iter(), create from iter(), change precision / scale (#1223)
35e16be is described below
commit 35e16be01e680e9381b2d1393c2e3f8e7acb7b13
Author: Andrew Lamb <an...@nerdnetworks.org>
AuthorDate: Tue Feb 8 15:10:14 2022 -0500
`DecimalArray` API ergonomics: add iter(), create from iter(), change precision / scale (#1223)
* DecimalArray: create from iter, iter(), docs
* Add with_precision and scale
* Implement iter() and into_iter() for DecimalArray
* Clean up and tests
* Return Result rather than panic
* Refactor error handling into separate function
* Validate data in `with_precision_and_scale`
!
* Use named constant values
* clippy
---
arrow/src/array/array_binary.rs | 307 ++++++++++++++++++++++++++++++++++++--
arrow/src/array/builder.rs | 90 +----------
arrow/src/array/data.rs | 21 +++
arrow/src/array/mod.rs | 2 -
arrow/src/compute/kernels/cast.rs | 5 +-
arrow/src/csv/reader.rs | 23 ++-
arrow/src/datatypes/datatype.rs | 124 ++++++++++++++-
7 files changed, 452 insertions(+), 120 deletions(-)
diff --git a/arrow/src/array/array_binary.rs b/arrow/src/array/array_binary.rs
index 46c7066..7496e24 100644
--- a/arrow/src/array/array_binary.rs
+++ b/arrow/src/array/array_binary.rs
@@ -15,16 +15,22 @@
// specific language governing permissions and limitations
// under the License.
+use std::borrow::Borrow;
use std::convert::{From, TryInto};
use std::fmt;
use std::{any::Any, iter::FromIterator};
+use super::BooleanBufferBuilder;
use super::{
array::print_long_array, raw_pointer::RawPtrBox, Array, ArrayData,
FixedSizeListArray, GenericBinaryIter, GenericListArray, OffsetSizeTrait,
};
use crate::buffer::Buffer;
-use crate::error::ArrowError;
+use crate::datatypes::{
+ validate_decimal_precision, DECIMAL_DEFAULT_SCALE, DECIMAL_MAX_PRECISION,
+ DECIMAL_MAX_SCALE,
+};
+use crate::error::{ArrowError, Result};
use crate::util::bit_util;
use crate::{buffer::MutableBuffer, datatypes::DataType};
@@ -491,7 +497,7 @@ impl FixedSizeBinaryArray {
/// # Errors
///
/// Returns error if argument has length zero, or sizes of nested slices don't match.
- pub fn try_from_sparse_iter<T, U>(mut iter: T) -> Result<Self, ArrowError>
+ pub fn try_from_sparse_iter<T, U>(mut iter: T) -> Result<Self>
where
T: Iterator<Item = Option<U>>,
U: AsRef<[u8]>,
@@ -502,7 +508,7 @@ impl FixedSizeBinaryArray {
let mut null_buf = MutableBuffer::from_len_zeroed(0);
let mut buffer = MutableBuffer::from_len_zeroed(0);
let mut prepend = 0;
- iter.try_for_each(|item| -> Result<(), ArrowError> {
+ iter.try_for_each(|item| -> Result<()> {
// extend null bitmask by one byte per each 8 items
if byte == 0 {
null_buf.push(0u8);
@@ -575,7 +581,7 @@ impl FixedSizeBinaryArray {
/// # Errors
///
/// Returns error if argument has length zero, or sizes of nested slices don't match.
- pub fn try_from_iter<T, U>(mut iter: T) -> Result<Self, ArrowError>
+ pub fn try_from_iter<T, U>(mut iter: T) -> Result<Self>
where
T: Iterator<Item = U>,
U: AsRef<[u8]>,
@@ -583,7 +589,7 @@ impl FixedSizeBinaryArray {
let mut len = 0;
let mut size = None;
let mut buffer = MutableBuffer::from_len_zeroed(0);
- iter.try_for_each(|item| -> Result<(), ArrowError> {
+ iter.try_for_each(|item| -> Result<()> {
let slice = item.as_ref();
if let Some(size) = size {
if size != slice.len() {
@@ -690,19 +696,28 @@ impl Array for FixedSizeBinaryArray {
}
}
-/// A type of `DecimalArray` whose elements are binaries.
+/// `DecimalArray` stores fixed width decimal numbers,
+/// with a fixed precision and scale.
///
/// # Examples
///
/// ```
/// use arrow::array::{Array, DecimalArray, DecimalBuilder};
/// use arrow::datatypes::DataType;
-/// let mut builder = DecimalBuilder::new(30, 23, 6);
///
-/// builder.append_value(8_887_000_000).unwrap();
-/// builder.append_null().unwrap();
-/// builder.append_value(-8_887_000_000).unwrap();
-/// let decimal_array: DecimalArray = builder.finish();
+/// // Create a DecimalArray with the default precision and scale
+/// let decimal_array: DecimalArray = vec![
+/// Some(8_887_000_000),
+/// None,
+/// Some(-8_887_000_000),
+/// ]
+/// .into_iter().collect();
+///
+/// // set precision and scale so values are interpreted
+/// // as `8887.000000`, `Null`, and `-8887.000000`
+/// let decimal_array = decimal_array
+/// .with_precision_and_scale(23, 6)
+/// .unwrap();
///
/// assert_eq!(&DataType::Decimal(23, 6), decimal_array.data_type());
/// assert_eq!(8_887_000_000, decimal_array.value(0));
@@ -816,13 +831,93 @@ impl DecimalArray {
let array_data = unsafe { builder.build_unchecked() };
Self::from(array_data)
}
+
+ /// Creates a [DecimalArray] with default precision and scale,
+ /// based on an iterator of `i128` values without nulls
+ pub fn from_iter_values<I: IntoIterator<Item = i128>>(iter: I) -> Self {
+ let val_buf: Buffer = iter.into_iter().collect();
+ let data = unsafe {
+ ArrayData::new_unchecked(
+ Self::default_type(),
+ val_buf.len() / std::mem::size_of::<i128>(),
+ None,
+ None,
+ 0,
+ vec![val_buf],
+ vec![],
+ )
+ };
+ DecimalArray::from(data)
+ }
+
+ /// Return the precision (total digits) that can be stored by this array
pub fn precision(&self) -> usize {
self.precision
}
+ /// Return the scale (digits after the decimal) that can be stored by this array
pub fn scale(&self) -> usize {
self.scale
}
+
+ /// Returns a DecimalArray with the same data as self, with the
+ /// specified precision.
+ ///
+ /// Returns an Error if:
+ /// 1. `precision` is larger than [`DECIMAL_MAX_PRECISION`]
+ /// 2. `scale` is larger than [`DECIMAL_MAX_SCALE`];
+ /// 3. `scale` is > `precision`
+ pub fn with_precision_and_scale(
+ mut self,
+ precision: usize,
+ scale: usize,
+ ) -> Result<Self> {
+ if precision > DECIMAL_MAX_PRECISION {
+ return Err(ArrowError::InvalidArgumentError(format!(
+ "precision {} is greater than max {}",
+ precision, DECIMAL_MAX_PRECISION
+ )));
+ }
+ if scale > DECIMAL_MAX_SCALE {
+ return Err(ArrowError::InvalidArgumentError(format!(
+ "scale {} is greater than max {}",
+ scale, DECIMAL_MAX_SCALE
+ )));
+ }
+ if scale > precision {
+ return Err(ArrowError::InvalidArgumentError(format!(
+ "scale {} is greater than precision {}",
+ scale, precision
+ )));
+ }
+
+ // Ensure that all values are within the requested
+ // precision. For performance, only check if the precision is
+ // decreased
+ if precision < self.precision {
+ for v in self.iter().flatten() {
+ validate_decimal_precision(v, precision)?;
+ }
+ }
+
+ assert_eq!(
+ self.data.data_type(),
+ &DataType::Decimal(self.precision, self.scale)
+ );
+
+ // safety: self.data is valid DataType::Decimal as checked above
+ let new_data_type = DataType::Decimal(precision, scale);
+ self.precision = precision;
+ self.scale = scale;
+ self.data = self.data.with_data_type(new_data_type);
+ Ok(self)
+ }
+
+ /// The default precision and scale used when not specified.
+ pub fn default_type() -> DataType {
+ // Keep maximum precision
+ DataType::Decimal(DECIMAL_MAX_PRECISION, DECIMAL_DEFAULT_SCALE)
+ }
}
impl From<ArrayData> for DecimalArray {
@@ -848,6 +943,97 @@ impl From<ArrayData> for DecimalArray {
}
}
+/// an iterator that returns Some(i128) or None, that can be used on a
+/// DecimalArray
+#[derive(Debug)]
+pub struct DecimalIter<'a> {
+ array: &'a DecimalArray,
+ current: usize,
+ current_end: usize,
+}
+
+impl<'a> DecimalIter<'a> {
+ pub fn new(array: &'a DecimalArray) -> Self {
+ Self {
+ array,
+ current: 0,
+ current_end: array.len(),
+ }
+ }
+}
+
+impl<'a> std::iter::Iterator for DecimalIter<'a> {
+ type Item = Option<i128>;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ if self.current == self.current_end {
+ None
+ } else {
+ let old = self.current;
+ self.current += 1;
+ // TODO: Improve performance by avoiding bounds check here
+ // (by using adding a `value_unchecked, for example)
+ if self.array.is_null(old) {
+ Some(None)
+ } else {
+ Some(Some(self.array.value(old)))
+ }
+ }
+ }
+}
+
+impl<'a> IntoIterator for &'a DecimalArray {
+ type Item = Option<i128>;
+ type IntoIter = DecimalIter<'a>;
+
+ fn into_iter(self) -> Self::IntoIter {
+ DecimalIter::<'a>::new(self)
+ }
+}
+
+impl<'a> DecimalArray {
+ /// constructs a new iterator
+ pub fn iter(&'a self) -> DecimalIter<'a> {
+ DecimalIter::new(self)
+ }
+}
+
+impl<Ptr: Borrow<Option<i128>>> FromIterator<Ptr> for DecimalArray {
+ fn from_iter<I: IntoIterator<Item = Ptr>>(iter: I) -> Self {
+ let iter = iter.into_iter();
+ let (lower, upper) = iter.size_hint();
+ let size_hint = upper.unwrap_or(lower);
+
+ let mut null_buf = BooleanBufferBuilder::new(size_hint);
+
+ let buffer: Buffer = iter
+ .map(|item| {
+ if let Some(a) = item.borrow() {
+ null_buf.append(true);
+ *a
+ } else {
+ null_buf.append(false);
+ // arbitrary value for NULL
+ 0
+ }
+ })
+ .collect();
+
+ let data = unsafe {
+ ArrayData::new_unchecked(
+ Self::default_type(),
+ null_buf.len(),
+ None,
+ Some(null_buf.into()),
+ 0,
+ vec![buffer],
+ vec![],
+ )
+ };
+ DecimalArray::from(data)
+ }
+}
+
impl fmt::Debug for DecimalArray {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "DecimalArray<{}, {}>\n[\n", self.precision, self.scale)?;
@@ -1316,7 +1502,7 @@ mod tests {
let mut result = decimal_builder.append_value(123456);
let mut error = result.unwrap_err();
assert_eq!(
- "Invalid argument error: The value of 123456 i128 is not compatible with Decimal(5,3)",
+ "Invalid argument error: 123456 is too large to store in a Decimal of precision 5. Max is 99999",
error.to_string()
);
decimal_builder.append_value(12345).unwrap();
@@ -1327,14 +1513,14 @@ mod tests {
result = decimal_builder.append_value(100);
error = result.unwrap_err();
assert_eq!(
- "Invalid argument error: The value of 100 i128 is not compatible with Decimal(2,1)",
+ "Invalid argument error: 100 is too large to store in a Decimal of precision 2. Max is 99",
error.to_string()
);
decimal_builder.append_value(99).unwrap();
result = decimal_builder.append_value(-100);
error = result.unwrap_err();
assert_eq!(
- "Invalid argument error: The value of -100 i128 is not compatible with Decimal(2,1)",
+ "Invalid argument error: -100 is too small to store in a Decimal of precision 2. Min is -99",
error.to_string()
);
decimal_builder.append_value(-99).unwrap();
@@ -1342,6 +1528,48 @@ mod tests {
assert_eq!("9.9", arr.value_as_string(0));
assert_eq!("-9.9", arr.value_as_string(1));
}
+ #[test]
+ fn test_decimal_from_iter_values() {
+ let array = DecimalArray::from_iter_values(vec![-100, 0, 101].into_iter());
+ assert_eq!(array.len(), 3);
+ assert_eq!(array.data_type(), &DataType::Decimal(38, 10));
+ assert_eq!(-100, array.value(0));
+ assert!(!array.is_null(0));
+ assert_eq!(0, array.value(1));
+ assert!(!array.is_null(1));
+ assert_eq!(101, array.value(2));
+ assert!(!array.is_null(2));
+ }
+
+ #[test]
+ fn test_decimal_from_iter() {
+ let array: DecimalArray = vec![Some(-100), None, Some(101)].into_iter().collect();
+ assert_eq!(array.len(), 3);
+ assert_eq!(array.data_type(), &DataType::Decimal(38, 10));
+ assert_eq!(-100, array.value(0));
+ assert!(!array.is_null(0));
+ assert!(array.is_null(1));
+ assert_eq!(101, array.value(2));
+ assert!(!array.is_null(2));
+ }
+
+ #[test]
+ fn test_decimal_iter() {
+ let data = vec![Some(-100), None, Some(101)];
+ let array: DecimalArray = data.clone().into_iter().collect();
+
+ let collected: Vec<_> = array.iter().collect();
+ assert_eq!(data, collected);
+ }
+
+ #[test]
+ fn test_decimal_into_iter() {
+ let data = vec![Some(-100), None, Some(101)];
+ let array: DecimalArray = data.clone().into_iter().collect();
+
+ let collected: Vec<_> = array.iter().collect();
+ assert_eq!(data, collected);
+ }
#[test]
fn test_decimal_array_value_as_string() {
@@ -1361,6 +1589,57 @@ mod tests {
}
#[test]
+ fn test_decimal_array_with_precision_and_scale() {
+ let arr = DecimalArray::from_iter_values([12345, 456, 7890, -123223423432432])
+ .with_precision_and_scale(20, 2)
+ .unwrap();
+
+ assert_eq!(arr.data_type(), &DataType::Decimal(20, 2));
+ assert_eq!(arr.precision(), 20);
+ assert_eq!(arr.scale(), 2);
+
+ let actual: Vec<_> = (0..arr.len()).map(|i| arr.value_as_string(i)).collect();
+ let expected = vec!["123.45", "4.56", "78.90", "-1232234234324.32"];
+
+ assert_eq!(actual, expected);
+ }
+
+ #[test]
+ #[should_panic(
+ expected = "-123223423432432 is too small to store in a Decimal of precision 5. Min is -99999"
+ )]
+ fn test_decimal_array_with_precision_and_scale_out_of_range() {
+ DecimalArray::from_iter_values([12345, 456, 7890, -123223423432432])
+ // precision is too small to hold value
+ .with_precision_and_scale(5, 2)
+ .unwrap();
+ }
+
+ #[test]
+ #[should_panic(expected = "precision 40 is greater than max 38")]
+ fn test_decimal_array_with_precision_and_scale_invalid_precision() {
+ DecimalArray::from_iter_values([12345, 456])
+ .with_precision_and_scale(40, 2)
+ .unwrap();
+ }
+
+ #[test]
+ #[should_panic(expected = "scale 40 is greater than max 38")]
+ fn test_decimal_array_with_precision_and_scale_invalid_scale() {
+ DecimalArray::from_iter_values([12345, 456])
+ .with_precision_and_scale(20, 40)
+ .unwrap();
+ }
+
+ #[test]
+ #[should_panic(expected = "scale 10 is greater than precision 4")]
+ fn test_decimal_array_with_precision_and_scale_invalid_precision_and_scale() {
+ DecimalArray::from_iter_values([12345, 456])
+ .with_precision_and_scale(4, 10)
+ .unwrap();
+ }
+
+ #[test]
fn test_decimal_array_fmt_debug() {
let values: Vec<i128> = vec![8887000000, -8887000000];
let mut decimal_builder = DecimalBuilder::new(3, 23, 6);
diff --git a/arrow/src/array/builder.rs b/arrow/src/array/builder.rs
index 18deac3..fd94eaa 100644
--- a/arrow/src/array/builder.rs
+++ b/arrow/src/array/builder.rs
@@ -1153,87 +1153,6 @@ pub struct FixedSizeBinaryBuilder {
builder: FixedSizeListBuilder<UInt8Builder>,
}
-pub const MAX_DECIMAL_FOR_EACH_PRECISION: [i128; 38] = [
- 9,
- 99,
- 999,
- 9999,
- 99999,
- 999999,
- 9999999,
- 99999999,
- 999999999,
- 9999999999,
- 99999999999,
- 999999999999,
- 9999999999999,
- 99999999999999,
- 999999999999999,
- 9999999999999999,
- 99999999999999999,
- 999999999999999999,
- 9999999999999999999,
- 99999999999999999999,
- 999999999999999999999,
- 9999999999999999999999,
- 99999999999999999999999,
- 999999999999999999999999,
- 9999999999999999999999999,
- 99999999999999999999999999,
- 999999999999999999999999999,
- 9999999999999999999999999999,
- 99999999999999999999999999999,
- 999999999999999999999999999999,
- 9999999999999999999999999999999,
- 99999999999999999999999999999999,
- 999999999999999999999999999999999,
- 9999999999999999999999999999999999,
- 99999999999999999999999999999999999,
- 999999999999999999999999999999999999,
- 9999999999999999999999999999999999999,
- 170141183460469231731687303715884105727,
-];
-pub const MIN_DECIMAL_FOR_EACH_PRECISION: [i128; 38] = [
- -9,
- -99,
- -999,
- -9999,
- -99999,
- -999999,
- -9999999,
- -99999999,
- -999999999,
- -9999999999,
- -99999999999,
- -999999999999,
- -9999999999999,
- -99999999999999,
- -999999999999999,
- -9999999999999999,
- -99999999999999999,
- -999999999999999999,
- -9999999999999999999,
- -99999999999999999999,
- -999999999999999999999,
- -9999999999999999999999,
- -99999999999999999999999,
- -999999999999999999999999,
- -9999999999999999999999999,
- -99999999999999999999999999,
- -999999999999999999999999999,
- -9999999999999999999999999999,
- -99999999999999999999999999999,
- -999999999999999999999999999999,
- -9999999999999999999999999999999,
- -99999999999999999999999999999999,
- -999999999999999999999999999999999,
- -9999999999999999999999999999999999,
- -99999999999999999999999999999999999,
- -999999999999999999999999999999999999,
- -9999999999999999999999999999999999999,
- -170141183460469231731687303715884105728,
-];
-
///
/// Array Builder for [`DecimalArray`]
///
@@ -1547,14 +1466,7 @@ impl DecimalBuilder {
/// distinct array element.
#[inline]
pub fn append_value(&mut self, value: i128) -> Result<()> {
- if value > MAX_DECIMAL_FOR_EACH_PRECISION[self.precision - 1]
- || value < MIN_DECIMAL_FOR_EACH_PRECISION[self.precision - 1]
- {
- return Err(ArrowError::InvalidArgumentError(format!(
- "The value of {} i128 is not compatible with Decimal({},{})",
- value, self.precision, self.scale
- )));
- }
+ let value = validate_decimal_precision(value, self.precision)?;
let value_as_bytes = Self::from_i128_to_fixed_size_bytes(
value,
self.builder.value_length() as usize,
diff --git a/arrow/src/array/data.rs b/arrow/src/array/data.rs
index 53cc633..0169d28 100644
--- a/arrow/src/array/data.rs
+++ b/arrow/src/array/data.rs
@@ -357,6 +357,27 @@ impl ArrayData {
&self.data_type
}
+ /// Updates the [DataType] of this ArrayData/
+ ///
+ /// panic's if the new DataType is not compatible with the
+ /// existing type.
+ ///
+ /// Note: currently only changing a [DataType::Decimal]s precision
+ /// and scale are supported
+ #[inline]
+ pub(crate) fn with_data_type(mut self, new_data_type: DataType) -> Self {
+ assert!(
+ matches!(self.data_type, DataType::Decimal(_, _)),
+ "only DecimalType is supported for existing type"
+ );
+ assert!(
+ matches!(new_data_type, DataType::Decimal(_, _)),
+ "only DecimalType is supported for new datatype"
+ );
+ self.data_type = new_data_type;
+ self
+ }
+
/// Returns a slice of buffers for this array data
pub fn buffers(&self) -> &[Buffer] {
&self.buffers[..]
diff --git a/arrow/src/array/mod.rs b/arrow/src/array/mod.rs
index c10b380..6929bfd 100644
--- a/arrow/src/array/mod.rs
+++ b/arrow/src/array/mod.rs
@@ -465,8 +465,6 @@ pub use self::builder::StringBuilder;
pub use self::builder::StringDictionaryBuilder;
pub use self::builder::StructBuilder;
pub use self::builder::UnionBuilder;
-pub use self::builder::MAX_DECIMAL_FOR_EACH_PRECISION;
-pub use self::builder::MIN_DECIMAL_FOR_EACH_PRECISION;
pub type Int8Builder = PrimitiveBuilder<Int8Type>;
pub type Int16Builder = PrimitiveBuilder<Int16Type>;
diff --git a/arrow/src/compute/kernels/cast.rs b/arrow/src/compute/kernels/cast.rs
index a3b4d6b..e3900aa 100644
--- a/arrow/src/compute/kernels/cast.rs
+++ b/arrow/src/compute/kernels/cast.rs
@@ -2116,7 +2116,8 @@ mod tests {
let array = Arc::new(input_decimal_array) as ArrayRef;
let result = cast(&array, &DataType::Decimal(2, 2));
assert!(result.is_err());
- assert_eq!("Invalid argument error: The value of 12345600 i128 is not compatible with Decimal(2,2)".to_string(), result.unwrap_err().to_string());
+ assert_eq!("Invalid argument error: 12345600 is too large to store in a Decimal of precision 2. Max is 99",
+ result.unwrap_err().to_string());
}
#[test]
@@ -2297,7 +2298,7 @@ mod tests {
let array = Arc::new(array) as ArrayRef;
let casted_array = cast(&array, &DataType::Decimal(3, 1));
assert!(casted_array.is_err());
- assert_eq!("Invalid argument error: The value of 1000 i128 is not compatible with Decimal(3,1)", casted_array.unwrap_err().to_string());
+ assert_eq!("Invalid argument error: 1000 is too large to store in a Decimal of precision 3. Max is 999", casted_array.unwrap_err().to_string());
// test f32 to decimal type
let array = Float32Array::from(vec![
diff --git a/arrow/src/csv/reader.rs b/arrow/src/csv/reader.rs
index 4d213a2..03a45ab 100644
--- a/arrow/src/csv/reader.rs
+++ b/arrow/src/csv/reader.rs
@@ -51,7 +51,6 @@ use std::sync::Arc;
use crate::array::{
ArrayRef, BooleanArray, DecimalBuilder, DictionaryArray, PrimitiveArray, StringArray,
- MAX_DECIMAL_FOR_EACH_PRECISION, MIN_DECIMAL_FOR_EACH_PRECISION,
};
use crate::compute::kernels::cast_utils::string_to_timestamp_nanos;
use crate::datatypes::*;
@@ -900,15 +899,8 @@ fn parse_decimal_with_parameter(s: &str, precision: usize, scale: usize) -> Resu
if negative {
result = result.neg();
}
- if result > MAX_DECIMAL_FOR_EACH_PRECISION[precision - 1]
- || result < MIN_DECIMAL_FOR_EACH_PRECISION[precision - 1]
- {
- return Err(ArrowError::ParseError(format!(
- "parse decimal overflow, the precision {}, the scale {}, the value {}",
- precision, scale, s
- )));
- }
- Ok(result)
+ validate_decimal_precision(result, precision)
+ .map_err(|e| ArrowError::ParseError(format!("parse decimal overflow: {}", e)))
} else {
Err(ArrowError::ParseError(format!(
"can't parse the string value {} to decimal",
@@ -1766,8 +1758,15 @@ mod tests {
let overflow_parse_tests = ["12345678", "12345678.9", "99999999.99"];
for s in overflow_parse_tests {
let result = parse_decimal_with_parameter(s, 10, 3);
- assert_eq!(format!(
- "Parser error: parse decimal overflow, the precision {}, the scale {}, the value {}", 10,3, s),result.unwrap_err().to_string());
+ let expected = "Parser error: parse decimal overflow";
+ let actual = result.unwrap_err().to_string();
+
+ assert!(
+ actual.contains(&expected),
+ "actual: '{}', expected: '{}'",
+ actual,
+ expected
+ );
}
}
diff --git a/arrow/src/datatypes/datatype.rs b/arrow/src/datatypes/datatype.rs
index 3653ebb..e6f5461 100644
--- a/arrow/src/datatypes/datatype.rs
+++ b/arrow/src/datatypes/datatype.rs
@@ -127,7 +127,12 @@ pub enum DataType {
/// This type mostly used to represent low cardinality string
/// arrays or a limited set of primitive types as integers.
Dictionary(Box<DataType>, Box<DataType>),
- /// Decimal value with precision and scale
+ /// Exact decimal value with precision and scale
+ ///
+ /// * precision is the total number of digits
+ /// * scale is the number of digits past the decimal
+ ///
+ /// For example the number 123.45 has precision 5 and scale 2.
Decimal(usize, usize),
/// A Map is a logical nested type that is represented as
///
@@ -189,6 +194,123 @@ impl fmt::Display for DataType {
}
}
+/// `MAX_DECIMAL_FOR_EACH_PRECISION[p]` holds the maximum `i128` value
+/// that can be stored in [DataType::Decimal] value of precision `p`
+pub const MAX_DECIMAL_FOR_EACH_PRECISION: [i128; 38] = [
+ 9,
+ 99,
+ 999,
+ 9999,
+ 99999,
+ 999999,
+ 9999999,
+ 99999999,
+ 999999999,
+ 9999999999,
+ 99999999999,
+ 999999999999,
+ 9999999999999,
+ 99999999999999,
+ 999999999999999,
+ 9999999999999999,
+ 99999999999999999,
+ 999999999999999999,
+ 9999999999999999999,
+ 99999999999999999999,
+ 999999999999999999999,
+ 9999999999999999999999,
+ 99999999999999999999999,
+ 999999999999999999999999,
+ 9999999999999999999999999,
+ 99999999999999999999999999,
+ 999999999999999999999999999,
+ 9999999999999999999999999999,
+ 99999999999999999999999999999,
+ 999999999999999999999999999999,
+ 9999999999999999999999999999999,
+ 99999999999999999999999999999999,
+ 999999999999999999999999999999999,
+ 9999999999999999999999999999999999,
+ 99999999999999999999999999999999999,
+ 999999999999999999999999999999999999,
+ 9999999999999999999999999999999999999,
+ 170141183460469231731687303715884105727,
+];
+
+/// `MIN_DECIMAL_FOR_EACH_PRECISION[p]` holds the minimum `i128` value
+/// that can be stored in a [DataType::Decimal] value of precision `p`
+pub const MIN_DECIMAL_FOR_EACH_PRECISION: [i128; 38] = [
+ -9,
+ -99,
+ -999,
+ -9999,
+ -99999,
+ -999999,
+ -9999999,
+ -99999999,
+ -999999999,
+ -9999999999,
+ -99999999999,
+ -999999999999,
+ -9999999999999,
+ -99999999999999,
+ -999999999999999,
+ -9999999999999999,
+ -99999999999999999,
+ -999999999999999999,
+ -9999999999999999999,
+ -99999999999999999999,
+ -999999999999999999999,
+ -9999999999999999999999,
+ -99999999999999999999999,
+ -999999999999999999999999,
+ -9999999999999999999999999,
+ -99999999999999999999999999,
+ -999999999999999999999999999,
+ -9999999999999999999999999999,
+ -99999999999999999999999999999,
+ -999999999999999999999999999999,
+ -9999999999999999999999999999999,
+ -99999999999999999999999999999999,
+ -999999999999999999999999999999999,
+ -9999999999999999999999999999999999,
+ -99999999999999999999999999999999999,
+ -999999999999999999999999999999999999,
+ -9999999999999999999999999999999999999,
+ -170141183460469231731687303715884105728,
+];
+
+/// The maximum precision for [DataType::Decimal] values
+pub const DECIMAL_MAX_PRECISION: usize = 38;
+
+/// The maximum scale for [DataType::Decimal] values
+pub const DECIMAL_MAX_SCALE: usize = 38;
+
+/// The default scale for [DataType::Decimal] values
+pub const DECIMAL_DEFAULT_SCALE: usize = 10;
+
+/// Validates that the specified `i128` value can be properly
+/// interpreted as a Decimal number with precision `precision`
+#[inline]
+pub(crate) fn validate_decimal_precision(value: i128, precision: usize) -> Result<i128> {
+ let max = MAX_DECIMAL_FOR_EACH_PRECISION[precision - 1];
+ let min = MIN_DECIMAL_FOR_EACH_PRECISION[precision - 1];
+
+ if value > max {
+ Err(ArrowError::InvalidArgumentError(format!(
+ "{} is too large to store in a Decimal of precision {}. Max is {}",
+ value, precision, max
+ )))
+ } else if value < min {
+ Err(ArrowError::InvalidArgumentError(format!(
+ "{} is too small to store in a Decimal of precision {}. Min is {}",
+ value, precision, min
+ )))
+ } else {
+ Ok(value)
+ }
+}
+
impl DataType {
/// Parse a data type from a JSON representation.
pub(crate) fn from(json: &Value) -> Result<DataType> {