You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2022/02/08 20:10:49 UTC

[arrow-rs] branch master updated: `DecimalArray` API ergonomics: add iter(), create from iter(), change precision / scale (#1223)

This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 35e16be  `DecimalArray` API ergonomics: add iter(), create from iter(), change precision / scale (#1223)
35e16be is described below

commit 35e16be01e680e9381b2d1393c2e3f8e7acb7b13
Author: Andrew Lamb <an...@nerdnetworks.org>
AuthorDate: Tue Feb 8 15:10:14 2022 -0500

    `DecimalArray` API ergonomics: add iter(), create from iter(), change precision / scale (#1223)
    
    * DecimalArray: create from iter, iter(), docs
    
    * Add with_precision and scale
    
    * Implement iter() and into_iter() for DecimalArray
    
    * Clean up and tests
    
    * Return Result rather than panic
    
    * Refactor error handling into separate function
    
    * Validate data in `with_precision_and_scale`
    !
    
    * Use named constant values
    
    * clippy
---
 arrow/src/array/array_binary.rs   | 307 ++++++++++++++++++++++++++++++++++++--
 arrow/src/array/builder.rs        |  90 +----------
 arrow/src/array/data.rs           |  21 +++
 arrow/src/array/mod.rs            |   2 -
 arrow/src/compute/kernels/cast.rs |   5 +-
 arrow/src/csv/reader.rs           |  23 ++-
 arrow/src/datatypes/datatype.rs   | 124 ++++++++++++++-
 7 files changed, 452 insertions(+), 120 deletions(-)

diff --git a/arrow/src/array/array_binary.rs b/arrow/src/array/array_binary.rs
index 46c7066..7496e24 100644
--- a/arrow/src/array/array_binary.rs
+++ b/arrow/src/array/array_binary.rs
@@ -15,16 +15,22 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use std::borrow::Borrow;
 use std::convert::{From, TryInto};
 use std::fmt;
 use std::{any::Any, iter::FromIterator};
 
+use super::BooleanBufferBuilder;
 use super::{
     array::print_long_array, raw_pointer::RawPtrBox, Array, ArrayData,
     FixedSizeListArray, GenericBinaryIter, GenericListArray, OffsetSizeTrait,
 };
 use crate::buffer::Buffer;
-use crate::error::ArrowError;
+use crate::datatypes::{
+    validate_decimal_precision, DECIMAL_DEFAULT_SCALE, DECIMAL_MAX_PRECISION,
+    DECIMAL_MAX_SCALE,
+};
+use crate::error::{ArrowError, Result};
 use crate::util::bit_util;
 use crate::{buffer::MutableBuffer, datatypes::DataType};
 
@@ -491,7 +497,7 @@ impl FixedSizeBinaryArray {
     /// # Errors
     ///
     /// Returns error if argument has length zero, or sizes of nested slices don't match.
-    pub fn try_from_sparse_iter<T, U>(mut iter: T) -> Result<Self, ArrowError>
+    pub fn try_from_sparse_iter<T, U>(mut iter: T) -> Result<Self>
     where
         T: Iterator<Item = Option<U>>,
         U: AsRef<[u8]>,
@@ -502,7 +508,7 @@ impl FixedSizeBinaryArray {
         let mut null_buf = MutableBuffer::from_len_zeroed(0);
         let mut buffer = MutableBuffer::from_len_zeroed(0);
         let mut prepend = 0;
-        iter.try_for_each(|item| -> Result<(), ArrowError> {
+        iter.try_for_each(|item| -> Result<()> {
             // extend null bitmask by one byte per each 8 items
             if byte == 0 {
                 null_buf.push(0u8);
@@ -575,7 +581,7 @@ impl FixedSizeBinaryArray {
     /// # Errors
     ///
     /// Returns error if argument has length zero, or sizes of nested slices don't match.
-    pub fn try_from_iter<T, U>(mut iter: T) -> Result<Self, ArrowError>
+    pub fn try_from_iter<T, U>(mut iter: T) -> Result<Self>
     where
         T: Iterator<Item = U>,
         U: AsRef<[u8]>,
@@ -583,7 +589,7 @@ impl FixedSizeBinaryArray {
         let mut len = 0;
         let mut size = None;
         let mut buffer = MutableBuffer::from_len_zeroed(0);
-        iter.try_for_each(|item| -> Result<(), ArrowError> {
+        iter.try_for_each(|item| -> Result<()> {
             let slice = item.as_ref();
             if let Some(size) = size {
                 if size != slice.len() {
@@ -690,19 +696,28 @@ impl Array for FixedSizeBinaryArray {
     }
 }
 
-/// A type of `DecimalArray` whose elements are binaries.
+/// `DecimalArray` stores fixed width decimal numbers,
+/// with a fixed precision and scale.
 ///
 /// # Examples
 ///
 /// ```
 ///    use arrow::array::{Array, DecimalArray, DecimalBuilder};
 ///    use arrow::datatypes::DataType;
-///    let mut builder = DecimalBuilder::new(30, 23, 6);
 ///
-///    builder.append_value(8_887_000_000).unwrap();
-///    builder.append_null().unwrap();
-///    builder.append_value(-8_887_000_000).unwrap();
-///    let decimal_array: DecimalArray = builder.finish();
+///    // Create a DecimalArray with the default precision and scale
+///    let decimal_array: DecimalArray = vec![
+///       Some(8_887_000_000),
+///       None,
+///       Some(-8_887_000_000),
+///     ]
+///     .into_iter().collect();
+///
+///    // set precision and scale so values are interpreted
+///    // as `8887.000000`, `Null`, and `-8887.000000`
+///    let decimal_array = decimal_array
+///     .with_precision_and_scale(23, 6)
+///     .unwrap();
 ///
 ///    assert_eq!(&DataType::Decimal(23, 6), decimal_array.data_type());
 ///    assert_eq!(8_887_000_000, decimal_array.value(0));
@@ -816,13 +831,93 @@ impl DecimalArray {
         let array_data = unsafe { builder.build_unchecked() };
         Self::from(array_data)
     }
+
+    /// Creates a [DecimalArray] with default precision and scale,
+    /// based on an iterator of `i128` values without nulls
+    pub fn from_iter_values<I: IntoIterator<Item = i128>>(iter: I) -> Self {
+        let val_buf: Buffer = iter.into_iter().collect();
+        let data = unsafe {
+            ArrayData::new_unchecked(
+                Self::default_type(),
+                val_buf.len() / std::mem::size_of::<i128>(),
+                None,
+                None,
+                0,
+                vec![val_buf],
+                vec![],
+            )
+        };
+        DecimalArray::from(data)
+    }
+
+    /// Return the precision (total digits) that can be stored by this array
     pub fn precision(&self) -> usize {
         self.precision
     }
 
+    /// Return the scale (digits after the decimal) that can be stored by this array
     pub fn scale(&self) -> usize {
         self.scale
     }
+
+    /// Returns a DecimalArray with the same data as self, with the
+    /// specified precision.
+    ///
+    /// Returns an Error if:
+    /// 1. `precision` is larger than [`DECIMAL_MAX_PRECISION`]
+    /// 2. `scale` is larger than [`DECIMAL_MAX_SCALE`];
+    /// 3. `scale` is > `precision`
+    pub fn with_precision_and_scale(
+        mut self,
+        precision: usize,
+        scale: usize,
+    ) -> Result<Self> {
+        if precision > DECIMAL_MAX_PRECISION {
+            return Err(ArrowError::InvalidArgumentError(format!(
+                "precision {} is greater than max {}",
+                precision, DECIMAL_MAX_PRECISION
+            )));
+        }
+        if scale > DECIMAL_MAX_SCALE {
+            return Err(ArrowError::InvalidArgumentError(format!(
+                "scale {} is greater than max {}",
+                scale, DECIMAL_MAX_SCALE
+            )));
+        }
+        if scale > precision {
+            return Err(ArrowError::InvalidArgumentError(format!(
+                "scale {} is greater than precision {}",
+                scale, precision
+            )));
+        }
+
+        // Ensure that all values are within the requested
+        // precision. For performance, only check if the precision is
+        // decreased
+        if precision < self.precision {
+            for v in self.iter().flatten() {
+                validate_decimal_precision(v, precision)?;
+            }
+        }
+
+        assert_eq!(
+            self.data.data_type(),
+            &DataType::Decimal(self.precision, self.scale)
+        );
+
+        // safety: self.data is valid DataType::Decimal as checked above
+        let new_data_type = DataType::Decimal(precision, scale);
+        self.precision = precision;
+        self.scale = scale;
+        self.data = self.data.with_data_type(new_data_type);
+        Ok(self)
+    }
+
+    /// The default precision and scale used when not specified.
+    pub fn default_type() -> DataType {
+        // Keep maximum precision
+        DataType::Decimal(DECIMAL_MAX_PRECISION, DECIMAL_DEFAULT_SCALE)
+    }
 }
 
 impl From<ArrayData> for DecimalArray {
@@ -848,6 +943,97 @@ impl From<ArrayData> for DecimalArray {
     }
 }
 
+/// an iterator that returns Some(i128) or None, that can be used on a
+/// DecimalArray
+#[derive(Debug)]
+pub struct DecimalIter<'a> {
+    array: &'a DecimalArray,
+    current: usize,
+    current_end: usize,
+}
+
+impl<'a> DecimalIter<'a> {
+    pub fn new(array: &'a DecimalArray) -> Self {
+        Self {
+            array,
+            current: 0,
+            current_end: array.len(),
+        }
+    }
+}
+
+impl<'a> std::iter::Iterator for DecimalIter<'a> {
+    type Item = Option<i128>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        if self.current == self.current_end {
+            None
+        } else {
+            let old = self.current;
+            self.current += 1;
+            // TODO: Improve performance by avoiding bounds check here
+            // (by using adding a `value_unchecked, for example)
+            if self.array.is_null(old) {
+                Some(None)
+            } else {
+                Some(Some(self.array.value(old)))
+            }
+        }
+    }
+}
+
+impl<'a> IntoIterator for &'a DecimalArray {
+    type Item = Option<i128>;
+    type IntoIter = DecimalIter<'a>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        DecimalIter::<'a>::new(self)
+    }
+}
+
+impl<'a> DecimalArray {
+    /// constructs a new iterator
+    pub fn iter(&'a self) -> DecimalIter<'a> {
+        DecimalIter::new(self)
+    }
+}
+
+impl<Ptr: Borrow<Option<i128>>> FromIterator<Ptr> for DecimalArray {
+    fn from_iter<I: IntoIterator<Item = Ptr>>(iter: I) -> Self {
+        let iter = iter.into_iter();
+        let (lower, upper) = iter.size_hint();
+        let size_hint = upper.unwrap_or(lower);
+
+        let mut null_buf = BooleanBufferBuilder::new(size_hint);
+
+        let buffer: Buffer = iter
+            .map(|item| {
+                if let Some(a) = item.borrow() {
+                    null_buf.append(true);
+                    *a
+                } else {
+                    null_buf.append(false);
+                    // arbitrary value for NULL
+                    0
+                }
+            })
+            .collect();
+
+        let data = unsafe {
+            ArrayData::new_unchecked(
+                Self::default_type(),
+                null_buf.len(),
+                None,
+                Some(null_buf.into()),
+                0,
+                vec![buffer],
+                vec![],
+            )
+        };
+        DecimalArray::from(data)
+    }
+}
+
 impl fmt::Debug for DecimalArray {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         write!(f, "DecimalArray<{}, {}>\n[\n", self.precision, self.scale)?;
@@ -1316,7 +1502,7 @@ mod tests {
         let mut result = decimal_builder.append_value(123456);
         let mut error = result.unwrap_err();
         assert_eq!(
-            "Invalid argument error: The value of 123456 i128 is not compatible with Decimal(5,3)",
+            "Invalid argument error: 123456 is too large to store in a Decimal of precision 5. Max is 99999",
             error.to_string()
         );
         decimal_builder.append_value(12345).unwrap();
@@ -1327,14 +1513,14 @@ mod tests {
         result = decimal_builder.append_value(100);
         error = result.unwrap_err();
         assert_eq!(
-            "Invalid argument error: The value of 100 i128 is not compatible with Decimal(2,1)",
+            "Invalid argument error: 100 is too large to store in a Decimal of precision 2. Max is 99",
             error.to_string()
         );
         decimal_builder.append_value(99).unwrap();
         result = decimal_builder.append_value(-100);
         error = result.unwrap_err();
         assert_eq!(
-            "Invalid argument error: The value of -100 i128 is not compatible with Decimal(2,1)",
+            "Invalid argument error: -100 is too small to store in a Decimal of precision 2. Min is -99",
             error.to_string()
         );
         decimal_builder.append_value(-99).unwrap();
@@ -1342,6 +1528,48 @@ mod tests {
         assert_eq!("9.9", arr.value_as_string(0));
         assert_eq!("-9.9", arr.value_as_string(1));
     }
+    #[test]
+    fn test_decimal_from_iter_values() {
+        let array = DecimalArray::from_iter_values(vec![-100, 0, 101].into_iter());
+        assert_eq!(array.len(), 3);
+        assert_eq!(array.data_type(), &DataType::Decimal(38, 10));
+        assert_eq!(-100, array.value(0));
+        assert!(!array.is_null(0));
+        assert_eq!(0, array.value(1));
+        assert!(!array.is_null(1));
+        assert_eq!(101, array.value(2));
+        assert!(!array.is_null(2));
+    }
+
+    #[test]
+    fn test_decimal_from_iter() {
+        let array: DecimalArray = vec![Some(-100), None, Some(101)].into_iter().collect();
+        assert_eq!(array.len(), 3);
+        assert_eq!(array.data_type(), &DataType::Decimal(38, 10));
+        assert_eq!(-100, array.value(0));
+        assert!(!array.is_null(0));
+        assert!(array.is_null(1));
+        assert_eq!(101, array.value(2));
+        assert!(!array.is_null(2));
+    }
+
+    #[test]
+    fn test_decimal_iter() {
+        let data = vec![Some(-100), None, Some(101)];
+        let array: DecimalArray = data.clone().into_iter().collect();
+
+        let collected: Vec<_> = array.iter().collect();
+        assert_eq!(data, collected);
+    }
+
+    #[test]
+    fn test_decimal_into_iter() {
+        let data = vec![Some(-100), None, Some(101)];
+        let array: DecimalArray = data.clone().into_iter().collect();
+
+        let collected: Vec<_> = array.iter().collect();
+        assert_eq!(data, collected);
+    }
 
     #[test]
     fn test_decimal_array_value_as_string() {
@@ -1361,6 +1589,57 @@ mod tests {
     }
 
     #[test]
+    fn test_decimal_array_with_precision_and_scale() {
+        let arr = DecimalArray::from_iter_values([12345, 456, 7890, -123223423432432])
+            .with_precision_and_scale(20, 2)
+            .unwrap();
+
+        assert_eq!(arr.data_type(), &DataType::Decimal(20, 2));
+        assert_eq!(arr.precision(), 20);
+        assert_eq!(arr.scale(), 2);
+
+        let actual: Vec<_> = (0..arr.len()).map(|i| arr.value_as_string(i)).collect();
+        let expected = vec!["123.45", "4.56", "78.90", "-1232234234324.32"];
+
+        assert_eq!(actual, expected);
+    }
+
+    #[test]
+    #[should_panic(
+        expected = "-123223423432432 is too small to store in a Decimal of precision 5. Min is -99999"
+    )]
+    fn test_decimal_array_with_precision_and_scale_out_of_range() {
+        DecimalArray::from_iter_values([12345, 456, 7890, -123223423432432])
+            // precision is too small to hold value
+            .with_precision_and_scale(5, 2)
+            .unwrap();
+    }
+
+    #[test]
+    #[should_panic(expected = "precision 40 is greater than max 38")]
+    fn test_decimal_array_with_precision_and_scale_invalid_precision() {
+        DecimalArray::from_iter_values([12345, 456])
+            .with_precision_and_scale(40, 2)
+            .unwrap();
+    }
+
+    #[test]
+    #[should_panic(expected = "scale 40 is greater than max 38")]
+    fn test_decimal_array_with_precision_and_scale_invalid_scale() {
+        DecimalArray::from_iter_values([12345, 456])
+            .with_precision_and_scale(20, 40)
+            .unwrap();
+    }
+
+    #[test]
+    #[should_panic(expected = "scale 10 is greater than precision 4")]
+    fn test_decimal_array_with_precision_and_scale_invalid_precision_and_scale() {
+        DecimalArray::from_iter_values([12345, 456])
+            .with_precision_and_scale(4, 10)
+            .unwrap();
+    }
+
+    #[test]
     fn test_decimal_array_fmt_debug() {
         let values: Vec<i128> = vec![8887000000, -8887000000];
         let mut decimal_builder = DecimalBuilder::new(3, 23, 6);
diff --git a/arrow/src/array/builder.rs b/arrow/src/array/builder.rs
index 18deac3..fd94eaa 100644
--- a/arrow/src/array/builder.rs
+++ b/arrow/src/array/builder.rs
@@ -1153,87 +1153,6 @@ pub struct FixedSizeBinaryBuilder {
     builder: FixedSizeListBuilder<UInt8Builder>,
 }
 
-pub const MAX_DECIMAL_FOR_EACH_PRECISION: [i128; 38] = [
-    9,
-    99,
-    999,
-    9999,
-    99999,
-    999999,
-    9999999,
-    99999999,
-    999999999,
-    9999999999,
-    99999999999,
-    999999999999,
-    9999999999999,
-    99999999999999,
-    999999999999999,
-    9999999999999999,
-    99999999999999999,
-    999999999999999999,
-    9999999999999999999,
-    99999999999999999999,
-    999999999999999999999,
-    9999999999999999999999,
-    99999999999999999999999,
-    999999999999999999999999,
-    9999999999999999999999999,
-    99999999999999999999999999,
-    999999999999999999999999999,
-    9999999999999999999999999999,
-    99999999999999999999999999999,
-    999999999999999999999999999999,
-    9999999999999999999999999999999,
-    99999999999999999999999999999999,
-    999999999999999999999999999999999,
-    9999999999999999999999999999999999,
-    99999999999999999999999999999999999,
-    999999999999999999999999999999999999,
-    9999999999999999999999999999999999999,
-    170141183460469231731687303715884105727,
-];
-pub const MIN_DECIMAL_FOR_EACH_PRECISION: [i128; 38] = [
-    -9,
-    -99,
-    -999,
-    -9999,
-    -99999,
-    -999999,
-    -9999999,
-    -99999999,
-    -999999999,
-    -9999999999,
-    -99999999999,
-    -999999999999,
-    -9999999999999,
-    -99999999999999,
-    -999999999999999,
-    -9999999999999999,
-    -99999999999999999,
-    -999999999999999999,
-    -9999999999999999999,
-    -99999999999999999999,
-    -999999999999999999999,
-    -9999999999999999999999,
-    -99999999999999999999999,
-    -999999999999999999999999,
-    -9999999999999999999999999,
-    -99999999999999999999999999,
-    -999999999999999999999999999,
-    -9999999999999999999999999999,
-    -99999999999999999999999999999,
-    -999999999999999999999999999999,
-    -9999999999999999999999999999999,
-    -99999999999999999999999999999999,
-    -999999999999999999999999999999999,
-    -9999999999999999999999999999999999,
-    -99999999999999999999999999999999999,
-    -999999999999999999999999999999999999,
-    -9999999999999999999999999999999999999,
-    -170141183460469231731687303715884105728,
-];
-
 ///
 /// Array Builder for [`DecimalArray`]
 ///
@@ -1547,14 +1466,7 @@ impl DecimalBuilder {
     /// distinct array element.
     #[inline]
     pub fn append_value(&mut self, value: i128) -> Result<()> {
-        if value > MAX_DECIMAL_FOR_EACH_PRECISION[self.precision - 1]
-            || value < MIN_DECIMAL_FOR_EACH_PRECISION[self.precision - 1]
-        {
-            return Err(ArrowError::InvalidArgumentError(format!(
-                "The value of {} i128 is not compatible with Decimal({},{})",
-                value, self.precision, self.scale
-            )));
-        }
+        let value = validate_decimal_precision(value, self.precision)?;
         let value_as_bytes = Self::from_i128_to_fixed_size_bytes(
             value,
             self.builder.value_length() as usize,
diff --git a/arrow/src/array/data.rs b/arrow/src/array/data.rs
index 53cc633..0169d28 100644
--- a/arrow/src/array/data.rs
+++ b/arrow/src/array/data.rs
@@ -357,6 +357,27 @@ impl ArrayData {
         &self.data_type
     }
 
+    /// Updates the [DataType] of this ArrayData/
+    ///
+    /// panic's if the new DataType is not compatible with the
+    /// existing type.
+    ///
+    /// Note: currently only changing a [DataType::Decimal]s precision
+    /// and scale are supported
+    #[inline]
+    pub(crate) fn with_data_type(mut self, new_data_type: DataType) -> Self {
+        assert!(
+            matches!(self.data_type, DataType::Decimal(_, _)),
+            "only DecimalType is supported for existing type"
+        );
+        assert!(
+            matches!(new_data_type, DataType::Decimal(_, _)),
+            "only DecimalType is supported for new datatype"
+        );
+        self.data_type = new_data_type;
+        self
+    }
+
     /// Returns a slice of buffers for this array data
     pub fn buffers(&self) -> &[Buffer] {
         &self.buffers[..]
diff --git a/arrow/src/array/mod.rs b/arrow/src/array/mod.rs
index c10b380..6929bfd 100644
--- a/arrow/src/array/mod.rs
+++ b/arrow/src/array/mod.rs
@@ -465,8 +465,6 @@ pub use self::builder::StringBuilder;
 pub use self::builder::StringDictionaryBuilder;
 pub use self::builder::StructBuilder;
 pub use self::builder::UnionBuilder;
-pub use self::builder::MAX_DECIMAL_FOR_EACH_PRECISION;
-pub use self::builder::MIN_DECIMAL_FOR_EACH_PRECISION;
 
 pub type Int8Builder = PrimitiveBuilder<Int8Type>;
 pub type Int16Builder = PrimitiveBuilder<Int16Type>;
diff --git a/arrow/src/compute/kernels/cast.rs b/arrow/src/compute/kernels/cast.rs
index a3b4d6b..e3900aa 100644
--- a/arrow/src/compute/kernels/cast.rs
+++ b/arrow/src/compute/kernels/cast.rs
@@ -2116,7 +2116,8 @@ mod tests {
         let array = Arc::new(input_decimal_array) as ArrayRef;
         let result = cast(&array, &DataType::Decimal(2, 2));
         assert!(result.is_err());
-        assert_eq!("Invalid argument error: The value of 12345600 i128 is not compatible with Decimal(2,2)".to_string(), result.unwrap_err().to_string());
+        assert_eq!("Invalid argument error: 12345600 is too large to store in a Decimal of precision 2. Max is 99",
+                   result.unwrap_err().to_string());
     }
 
     #[test]
@@ -2297,7 +2298,7 @@ mod tests {
         let array = Arc::new(array) as ArrayRef;
         let casted_array = cast(&array, &DataType::Decimal(3, 1));
         assert!(casted_array.is_err());
-        assert_eq!("Invalid argument error: The value of 1000 i128 is not compatible with Decimal(3,1)", casted_array.unwrap_err().to_string());
+        assert_eq!("Invalid argument error: 1000 is too large to store in a Decimal of precision 3. Max is 999", casted_array.unwrap_err().to_string());
 
         // test f32 to decimal type
         let array = Float32Array::from(vec![
diff --git a/arrow/src/csv/reader.rs b/arrow/src/csv/reader.rs
index 4d213a2..03a45ab 100644
--- a/arrow/src/csv/reader.rs
+++ b/arrow/src/csv/reader.rs
@@ -51,7 +51,6 @@ use std::sync::Arc;
 
 use crate::array::{
     ArrayRef, BooleanArray, DecimalBuilder, DictionaryArray, PrimitiveArray, StringArray,
-    MAX_DECIMAL_FOR_EACH_PRECISION, MIN_DECIMAL_FOR_EACH_PRECISION,
 };
 use crate::compute::kernels::cast_utils::string_to_timestamp_nanos;
 use crate::datatypes::*;
@@ -900,15 +899,8 @@ fn parse_decimal_with_parameter(s: &str, precision: usize, scale: usize) -> Resu
         if negative {
             result = result.neg();
         }
-        if result > MAX_DECIMAL_FOR_EACH_PRECISION[precision - 1]
-            || result < MIN_DECIMAL_FOR_EACH_PRECISION[precision - 1]
-        {
-            return Err(ArrowError::ParseError(format!(
-                "parse decimal overflow, the precision {}, the scale {}, the value {}",
-                precision, scale, s
-            )));
-        }
-        Ok(result)
+        validate_decimal_precision(result, precision)
+            .map_err(|e| ArrowError::ParseError(format!("parse decimal overflow: {}", e)))
     } else {
         Err(ArrowError::ParseError(format!(
             "can't parse the string value {} to decimal",
@@ -1766,8 +1758,15 @@ mod tests {
         let overflow_parse_tests = ["12345678", "12345678.9", "99999999.99"];
         for s in overflow_parse_tests {
             let result = parse_decimal_with_parameter(s, 10, 3);
-            assert_eq!(format!(
-                "Parser error: parse decimal overflow, the precision {}, the scale {}, the value {}", 10,3, s),result.unwrap_err().to_string());
+            let expected = "Parser error: parse decimal overflow";
+            let actual = result.unwrap_err().to_string();
+
+            assert!(
+                actual.contains(&expected),
+                "actual: '{}', expected: '{}'",
+                actual,
+                expected
+            );
         }
     }
 
diff --git a/arrow/src/datatypes/datatype.rs b/arrow/src/datatypes/datatype.rs
index 3653ebb..e6f5461 100644
--- a/arrow/src/datatypes/datatype.rs
+++ b/arrow/src/datatypes/datatype.rs
@@ -127,7 +127,12 @@ pub enum DataType {
     /// This type mostly used to represent low cardinality string
     /// arrays or a limited set of primitive types as integers.
     Dictionary(Box<DataType>, Box<DataType>),
-    /// Decimal value with precision and scale
+    /// Exact decimal value with precision and scale
+    ///
+    /// * precision is the total number of digits
+    /// * scale is the number of digits past the decimal
+    ///
+    /// For example the number 123.45 has precision 5 and scale 2.
     Decimal(usize, usize),
     /// A Map is a logical nested type that is represented as
     ///
@@ -189,6 +194,123 @@ impl fmt::Display for DataType {
     }
 }
 
+/// `MAX_DECIMAL_FOR_EACH_PRECISION[p]` holds the maximum `i128` value
+/// that can be stored in [DataType::Decimal] value of precision `p`
+pub const MAX_DECIMAL_FOR_EACH_PRECISION: [i128; 38] = [
+    9,
+    99,
+    999,
+    9999,
+    99999,
+    999999,
+    9999999,
+    99999999,
+    999999999,
+    9999999999,
+    99999999999,
+    999999999999,
+    9999999999999,
+    99999999999999,
+    999999999999999,
+    9999999999999999,
+    99999999999999999,
+    999999999999999999,
+    9999999999999999999,
+    99999999999999999999,
+    999999999999999999999,
+    9999999999999999999999,
+    99999999999999999999999,
+    999999999999999999999999,
+    9999999999999999999999999,
+    99999999999999999999999999,
+    999999999999999999999999999,
+    9999999999999999999999999999,
+    99999999999999999999999999999,
+    999999999999999999999999999999,
+    9999999999999999999999999999999,
+    99999999999999999999999999999999,
+    999999999999999999999999999999999,
+    9999999999999999999999999999999999,
+    99999999999999999999999999999999999,
+    999999999999999999999999999999999999,
+    9999999999999999999999999999999999999,
+    170141183460469231731687303715884105727,
+];
+
+/// `MIN_DECIMAL_FOR_EACH_PRECISION[p]` holds the minimum `i128` value
+/// that can be stored in a [DataType::Decimal] value of precision `p`
+pub const MIN_DECIMAL_FOR_EACH_PRECISION: [i128; 38] = [
+    -9,
+    -99,
+    -999,
+    -9999,
+    -99999,
+    -999999,
+    -9999999,
+    -99999999,
+    -999999999,
+    -9999999999,
+    -99999999999,
+    -999999999999,
+    -9999999999999,
+    -99999999999999,
+    -999999999999999,
+    -9999999999999999,
+    -99999999999999999,
+    -999999999999999999,
+    -9999999999999999999,
+    -99999999999999999999,
+    -999999999999999999999,
+    -9999999999999999999999,
+    -99999999999999999999999,
+    -999999999999999999999999,
+    -9999999999999999999999999,
+    -99999999999999999999999999,
+    -999999999999999999999999999,
+    -9999999999999999999999999999,
+    -99999999999999999999999999999,
+    -999999999999999999999999999999,
+    -9999999999999999999999999999999,
+    -99999999999999999999999999999999,
+    -999999999999999999999999999999999,
+    -9999999999999999999999999999999999,
+    -99999999999999999999999999999999999,
+    -999999999999999999999999999999999999,
+    -9999999999999999999999999999999999999,
+    -170141183460469231731687303715884105728,
+];
+
+/// The maximum precision for [DataType::Decimal] values
+pub const DECIMAL_MAX_PRECISION: usize = 38;
+
+/// The maximum scale for [DataType::Decimal] values
+pub const DECIMAL_MAX_SCALE: usize = 38;
+
+/// The default scale for [DataType::Decimal] values
+pub const DECIMAL_DEFAULT_SCALE: usize = 10;
+
+/// Validates that the specified `i128` value can be properly
+/// interpreted as a Decimal number with precision `precision`
+#[inline]
+pub(crate) fn validate_decimal_precision(value: i128, precision: usize) -> Result<i128> {
+    let max = MAX_DECIMAL_FOR_EACH_PRECISION[precision - 1];
+    let min = MIN_DECIMAL_FOR_EACH_PRECISION[precision - 1];
+
+    if value > max {
+        Err(ArrowError::InvalidArgumentError(format!(
+            "{} is too large to store in a Decimal of precision {}. Max is {}",
+            value, precision, max
+        )))
+    } else if value < min {
+        Err(ArrowError::InvalidArgumentError(format!(
+            "{} is too small to store in a Decimal of precision {}. Min is {}",
+            value, precision, min
+        )))
+    } else {
+        Ok(value)
+    }
+}
+
 impl DataType {
     /// Parse a data type from a JSON representation.
     pub(crate) fn from(json: &Value) -> Result<DataType> {