You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by vi...@apache.org on 2022/11/08 08:58:39 UTC
[arrow-rs] branch master updated: Cast decimal256 to signed integer (#3040)

This is an automated email from the ASF dual-hosted git repository.

viirya pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new a950b52ec Cast decimal256 to signed integer (#3040)
a950b52ec is described below

commit a950b52ec83e5ac14e147f9605f871ba6bd06ee0
Author: Liang-Chi Hsieh <vi...@gmail.com>
AuthorDate: Tue Nov 8 00:58:34 2022 -0800

    Cast decimal256 to signed integer (#3040)
    
    * Cast decimal256 to signed integer
    
    * Use ToPrimitive
    
    * Add CastOptions
---
 arrow-buffer/src/bigint.rs |  87 ++++++++++++++++--
 arrow-cast/src/cast.rs     | 216 +++++++++++++++++++++++++++++++++++++--------
 2 files changed, 261 insertions(+), 42 deletions(-)

diff --git a/arrow-buffer/src/bigint.rs b/arrow-buffer/src/bigint.rs
index 8dd57d2c4..be02c2857 100644
--- a/arrow-buffer/src/bigint.rs
+++ b/arrow-buffer/src/bigint.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use num::cast::AsPrimitive;
-use num::{BigInt, FromPrimitive};
+use num::{BigInt, FromPrimitive, ToPrimitive};
 use std::cmp::Ordering;
 
 /// A signed 256-bit integer
@@ -388,13 +388,15 @@ impl i256 {
 
 /// Temporary workaround due to lack of stable const array slicing
 /// See <https://github.com/rust-lang/rust/issues/90091>
-const fn split_array(vals: [u8; 32]) -> ([u8; 16], [u8; 16]) {
-    let mut a = [0; 16];
-    let mut b = [0; 16];
+const fn split_array<const N: usize, const M: usize>(
+    vals: [u8; N],
+) -> ([u8; M], [u8; M]) {
+    let mut a = [0; M];
+    let mut b = [0; M];
     let mut i = 0;
-    while i != 16 {
+    while i != M {
         a[i] = vals[i];
-        b[i] = vals[i + 16];
+        b[i] = vals[i + M];
         i += 1;
     }
     (a, b)
@@ -478,6 +480,44 @@ define_as_primitive!(i16);
 define_as_primitive!(i32);
 define_as_primitive!(i64);
 
+impl ToPrimitive for i256 {
+    fn to_i64(&self) -> Option<i64> {
+        let as_i128 = self.low as i128;
+
+        let high_negative = self.high < 0;
+        let low_negative = as_i128 < 0;
+        let high_valid = self.high == -1 || self.high == 0;
+
+        if high_negative == low_negative && high_valid {
+            let (low_bytes, high_bytes) = split_array(u128::to_le_bytes(self.low));
+            let high = i64::from_le_bytes(high_bytes);
+            let low = i64::from_le_bytes(low_bytes);
+
+            let high_negative = high < 0;
+            let low_negative = low < 0;
+            let high_valid = self.high == -1 || self.high == 0;
+
+            (high_negative == low_negative && high_valid).then_some(low)
+        } else {
+            None
+        }
+    }
+
+    fn to_u64(&self) -> Option<u64> {
+        let as_i128 = self.low as i128;
+
+        let high_negative = self.high < 0;
+        let low_negative = as_i128 < 0;
+        let high_valid = self.high == -1 || self.high == 0;
+
+        if high_negative == low_negative && high_valid {
+            self.low.to_u64()
+        } else {
+            None
+        }
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -676,4 +716,39 @@ mod tests {
             test_ops(i256::from_le_bytes(l), i256::from_le_bytes(r))
         }
     }
+
+    #[test]
+    fn test_i256_to_primitive() {
+        let a = i256::MAX;
+        assert!(a.to_i64().is_none());
+        assert!(a.to_u64().is_none());
+
+        let a = i256::from_i128(i128::MAX);
+        assert!(a.to_i64().is_none());
+        assert!(a.to_u64().is_none());
+
+        let a = i256::from_i128(i64::MAX as i128);
+        assert_eq!(a.to_i64().unwrap(), i64::MAX);
+        assert_eq!(a.to_u64().unwrap(), i64::MAX as u64);
+
+        let a = i256::from_i128(i64::MAX as i128 + 1);
+        assert!(a.to_i64().is_none());
+        assert_eq!(a.to_u64().unwrap(), i64::MAX as u64 + 1);
+
+        let a = i256::MIN;
+        assert!(a.to_i64().is_none());
+        assert!(a.to_u64().is_none());
+
+        let a = i256::from_i128(i128::MIN);
+        assert!(a.to_i64().is_none());
+        assert!(a.to_u64().is_none());
+
+        let a = i256::from_i128(i64::MIN as i128);
+        assert_eq!(a.to_i64().unwrap(), i64::MIN);
+        assert!(a.to_u64().is_none());
+
+        let a = i256::from_i128(i64::MIN as i128 - 1);
+        assert!(a.to_i64().is_none());
+        assert!(a.to_u64().is_none());
+    }
 }
diff --git a/arrow-cast/src/cast.rs b/arrow-cast/src/cast.rs
index e394426bd..1cc814730 100644
--- a/arrow-cast/src/cast.rs
+++ b/arrow-cast/src/cast.rs
@@ -81,7 +81,8 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool {
         (Null | Int8 | Int16 | Int32 | Int64 | Float32 | Float64, Decimal128(_, _)) |
         (Null | Int8 | Int16 | Int32 | Int64 | Float32 | Float64, Decimal256(_, _)) |
         // decimal to signed numeric
-        (Decimal128(_, _), Null | Int8 | Int16 | Int32 | Int64 | Float32 | Float64)
+        (Decimal128(_, _), Null | Int8 | Int16 | Int32 | Int64 | Float32 | Float64) |
+        (Decimal256(_, _), Null | Int8 | Int16 | Int32 | Int64 )
         | (
             Null,
             Boolean
@@ -433,34 +434,65 @@ fn cast_reinterpret_arrays<
     ))
 }
 
-// cast the decimal array to integer array
-macro_rules! cast_decimal_to_integer {
-    ($ARRAY:expr, $SCALE : ident, $VALUE_BUILDER: ident, $NATIVE_TYPE : ident, $DATA_TYPE : expr) => {{
-        let array = $ARRAY.as_any().downcast_ref::<Decimal128Array>().unwrap();
-        let mut value_builder = $VALUE_BUILDER::with_capacity(array.len());
-        let div: i128 = 10_i128.pow(*$SCALE as u32);
-        let min_bound = ($NATIVE_TYPE::MIN) as i128;
-        let max_bound = ($NATIVE_TYPE::MAX) as i128;
+fn cast_decimal_to_integer<D, T>(
+    array: &ArrayRef,
+    base: D::Native,
+    scale: u8,
+    cast_options: &CastOptions,
+) -> Result<ArrayRef, ArrowError>
+where
+    T: ArrowPrimitiveType,
+    <T as ArrowPrimitiveType>::Native: NumCast,
+    D: DecimalType + ArrowPrimitiveType,
+    <D as ArrowPrimitiveType>::Native: ArrowNativeTypeOp + ToPrimitive,
+{
+    let array = array.as_any().downcast_ref::<PrimitiveArray<D>>().unwrap();
+
+    let div: D::Native = base.pow_checked(scale as u32).map_err(|_| {
+        ArrowError::CastError(format!(
+            "Cannot cast to {:?}. The scale {} causes overflow.",
+            D::PREFIX,
+            scale,
+        ))
+    })?;
+
+    let mut value_builder = PrimitiveBuilder::<T>::with_capacity(array.len());
+
+    if cast_options.safe {
         for i in 0..array.len() {
             if array.is_null(i) {
                 value_builder.append_null();
             } else {
-                let v = array.value(i) / div;
-                // check the overflow
-                // For example: Decimal(128,10,0) as i8
-                // 128 is out of range i8
-                if v <= max_bound && v >= min_bound {
-                    value_builder.append_value(v as $NATIVE_TYPE);
-                } else {
-                    return Err(ArrowError::CastError(format!(
-                        "value of {} is out of range {}",
-                        v, $DATA_TYPE
-                    )));
-                }
+                let v = array
+                    .value(i)
+                    .div_checked(div)
+                    .ok()
+                    .and_then(<T::Native as NumCast>::from::<D::Native>);
+
+                value_builder.append_option(v);
             }
         }
-        Ok(Arc::new(value_builder.finish()))
-    }};
+    } else {
+        for i in 0..array.len() {
+            if array.is_null(i) {
+                value_builder.append_null();
+            } else {
+                let v = array.value(i).div_checked(div)?;
+
+                let value =
+                    <T::Native as NumCast>::from::<D::Native>(v).ok_or_else(|| {
+                        ArrowError::CastError(format!(
+                            "value of {:?} is out of range {}",
+                            v,
+                            T::DATA_TYPE
+                        ))
+                    })?;
+
+                value_builder.append_value(value);
+            }
+        }
+    }
+    Ok(Arc::new(value_builder.finish()))
 }
 
 // cast the decimal array to floating-point array
@@ -576,18 +608,30 @@ pub fn cast_with_options(
         (Decimal128(_, scale), _) => {
             // cast decimal to other type
             match to_type {
-                Int8 => {
-                    cast_decimal_to_integer!(array, scale, Int8Builder, i8, Int8)
-                }
-                Int16 => {
-                    cast_decimal_to_integer!(array, scale, Int16Builder, i16, Int16)
-                }
-                Int32 => {
-                    cast_decimal_to_integer!(array, scale, Int32Builder, i32, Int32)
-                }
-                Int64 => {
-                    cast_decimal_to_integer!(array, scale, Int64Builder, i64, Int64)
-                }
+                Int8 => cast_decimal_to_integer::<Decimal128Type, Int8Type>(
+                    array,
+                    10_i128,
+                    *scale,
+                    cast_options,
+                ),
+                Int16 => cast_decimal_to_integer::<Decimal128Type, Int16Type>(
+                    array,
+                    10_i128,
+                    *scale,
+                    cast_options,
+                ),
+                Int32 => cast_decimal_to_integer::<Decimal128Type, Int32Type>(
+                    array,
+                    10_i128,
+                    *scale,
+                    cast_options,
+                ),
+                Int64 => cast_decimal_to_integer::<Decimal128Type, Int64Type>(
+                    array,
+                    10_i128,
+                    *scale,
+                    cast_options,
+                ),
                 Float32 => {
                     cast_decimal_to_float!(array, scale, Float32Builder, f32)
                 }
@@ -601,6 +645,40 @@ pub fn cast_with_options(
                 ))),
             }
         }
+        (Decimal256(_, scale), _) => {
+            // cast decimal to other type
+            match to_type {
+                Int8 => cast_decimal_to_integer::<Decimal256Type, Int8Type>(
+                    array,
+                    i256::from_i128(10_i128),
+                    *scale,
+                    cast_options,
+                ),
+                Int16 => cast_decimal_to_integer::<Decimal256Type, Int16Type>(
+                    array,
+                    i256::from_i128(10_i128),
+                    *scale,
+                    cast_options,
+                ),
+                Int32 => cast_decimal_to_integer::<Decimal256Type, Int32Type>(
+                    array,
+                    i256::from_i128(10_i128),
+                    *scale,
+                    cast_options,
+                ),
+                Int64 => cast_decimal_to_integer::<Decimal256Type, Int64Type>(
+                    array,
+                    i256::from_i128(10_i128),
+                    *scale,
+                    cast_options,
+                ),
+                Null => Ok(new_null_array(to_type, array.len())),
+                _ => Err(ArrowError::CastError(format!(
+                    "Casting from {:?} to {:?} not supported",
+                    from_type, to_type
+                ))),
+            }
+        }
         (_, Decimal128(precision, scale)) => {
             // cast data to decimal
             match from_type {
@@ -3154,12 +3232,18 @@ mod tests {
         let value_array: Vec<Option<i128>> = vec![Some(24400)];
         let decimal_array = create_decimal_array(value_array, 38, 2).unwrap();
         let array = Arc::new(decimal_array) as ArrayRef;
-        let casted_array = cast(&array, &DataType::Int8);
+        let casted_array =
+            cast_with_options(&array, &DataType::Int8, &CastOptions { safe: false });
         assert_eq!(
             "Cast error: value of 244 is out of range Int8".to_string(),
             casted_array.unwrap_err().to_string()
         );
 
+        let casted_array =
+            cast_with_options(&array, &DataType::Int8, &CastOptions { safe: true });
+        assert!(casted_array.is_ok());
+        assert!(casted_array.unwrap().is_null(0));
+
         // loss the precision: convert decimal to f32、f64
         // f32
         // 112345678_f32 and 112345679_f32 are same, so the 112345679_f32 will lose precision.
@@ -3218,6 +3302,66 @@ mod tests {
         );
     }
 
+    #[test]
+    fn test_cast_decimal256_to_numeric() {
+        let decimal_type = DataType::Decimal256(38, 2);
+        // negative test
+        assert!(!can_cast_types(&decimal_type, &DataType::UInt8));
+        let value_array: Vec<Option<i256>> = vec![
+            Some(i256::from_i128(125)),
+            Some(i256::from_i128(225)),
+            Some(i256::from_i128(325)),
+            None,
+            Some(i256::from_i128(525)),
+        ];
+        let decimal_array = create_decimal256_array(value_array, 38, 2).unwrap();
+        let array = Arc::new(decimal_array) as ArrayRef;
+        // i8
+        generate_cast_test_case!(
+            &array,
+            Int8Array,
+            &DataType::Int8,
+            vec![Some(1_i8), Some(2_i8), Some(3_i8), None, Some(5_i8)]
+        );
+        // i16
+        generate_cast_test_case!(
+            &array,
+            Int16Array,
+            &DataType::Int16,
+            vec![Some(1_i16), Some(2_i16), Some(3_i16), None, Some(5_i16)]
+        );
+        // i32
+        generate_cast_test_case!(
+            &array,
+            Int32Array,
+            &DataType::Int32,
+            vec![Some(1_i32), Some(2_i32), Some(3_i32), None, Some(5_i32)]
+        );
+        // i64
+        generate_cast_test_case!(
+            &array,
+            Int64Array,
+            &DataType::Int64,
+            vec![Some(1_i64), Some(2_i64), Some(3_i64), None, Some(5_i64)]
+        );
+
+        // overflow test: out of range of max i8
+        let value_array: Vec<Option<i256>> = vec![Some(i256::from_i128(24400))];
+        let decimal_array = create_decimal256_array(value_array, 38, 2).unwrap();
+        let array = Arc::new(decimal_array) as ArrayRef;
+        let casted_array =
+            cast_with_options(&array, &DataType::Int8, &CastOptions { safe: false });
+        assert_eq!(
+            "Cast error: value of 244 is out of range Int8".to_string(),
+            casted_array.unwrap_err().to_string()
+        );
+
+        let casted_array =
+            cast_with_options(&array, &DataType::Int8, &CastOptions { safe: true });
+        assert!(casted_array.is_ok());
+        assert!(casted_array.unwrap().is_null(0));
+    }
+
     #[test]
     #[cfg(not(feature = "force_validate"))]
     fn test_cast_numeric_to_decimal128() {