You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2021/12/21 15:18:13 UTC

[GitHub] [arrow-rs] alamb commented on a change in pull request #1073: support cast decimal to signed numeric

alamb commented on a change in pull request #1073:
URL: https://github.com/apache/arrow-rs/pull/1073#discussion_r773205855



##########
File path: arrow/src/compute/kernels/cast.rs
##########
@@ -281,6 +285,56 @@ macro_rules! cast_floating_point_to_decimal {
     }};
 }
 
+// cast the decimal array to integer array
+macro_rules! cast_decimal_to_integer {
+    ($ARRAY:expr, $SCALE : ident, $VALUE_BUILDER: ident, $NATIVE_TYPE : ident, $DATA_TYPE : expr) => {{
+        let array = $ARRAY.as_any().downcast_ref::<DecimalArray>().unwrap();
+        let mut value_builder = $VALUE_BUILDER::new(array.len());
+        let div: i128 = 10_i128.pow(*$SCALE as u32);
+        let min_bound = ($NATIVE_TYPE::MIN) as i128;
+        let max_bound = ($NATIVE_TYPE::MAX) as i128;
+        for i in 0..array.len() {
+            if array.is_null(i) {
+                value_builder.append_null()?;
+            } else {
+                let v = array.value(i) / div;
+                // check the overflow
+                // For example: Decimal(128,10,0) as i8
+                // 128 is out of range i8
+                if v <= max_bound && v >= min_bound {

Review comment:
       👍 

##########
File path: arrow/src/compute/kernels/cast.rs
##########
@@ -281,6 +285,56 @@ macro_rules! cast_floating_point_to_decimal {
     }};
 }
 
+// cast the decimal array to integer array
+macro_rules! cast_decimal_to_integer {
+    ($ARRAY:expr, $SCALE : ident, $VALUE_BUILDER: ident, $NATIVE_TYPE : ident, $DATA_TYPE : expr) => {{
+        let array = $ARRAY.as_any().downcast_ref::<DecimalArray>().unwrap();
+        let mut value_builder = $VALUE_BUILDER::new(array.len());
+        let div: i128 = 10_i128.pow(*$SCALE as u32);
+        let min_bound = ($NATIVE_TYPE::MIN) as i128;
+        let max_bound = ($NATIVE_TYPE::MAX) as i128;
+        for i in 0..array.len() {

Review comment:
       FWIW as a performance optimization in the future, we might be able to avoid the use of a `builder` and create the arrays directly
   
   So something like 
   
   let new_array: Int8Array = array.iter()
     .map(|v| v.map(|v| {
       let v =  v / div;
       if v <= max_bound && v >= min_bound {
         ...
       }
     }).collect()?;
   ```
   
   Or something

##########
File path: arrow/src/compute/kernels/cast.rs
##########
@@ -1906,26 +1987,193 @@ where
 mod tests {
     use super::*;
     use crate::{buffer::Buffer, util::display::array_value_to_string};
-    use num::traits::Pow;
+
+    macro_rules! generate_cast_test_case {
+        ($INPUT_ARRAY: expr, $INPUT_ARRAY_TYPE: expr, $OUTPUT_TYPE_ARRAY: ident, $OUTPUT_TYPE: expr, $OUTPUT_VALUES: expr) => {

Review comment:
       I think you could avoid having to pass in `$INPUT_ARRAY_TYPE` by using the `data_type()` function
   
   like
   ```rust
   let input_array_type = $INPUT_ARRAY.data_type()
   ```
   

##########
File path: arrow/src/compute/kernels/cast.rs
##########
@@ -1906,26 +1987,193 @@ where
 mod tests {
     use super::*;
     use crate::{buffer::Buffer, util::display::array_value_to_string};
-    use num::traits::Pow;
+
+    macro_rules! generate_cast_test_case {
+        ($INPUT_ARRAY: expr, $INPUT_ARRAY_TYPE: expr, $OUTPUT_TYPE_ARRAY: ident, $OUTPUT_TYPE: expr, $OUTPUT_VALUES: expr) => {
+            // assert cast type
+            assert!(can_cast_types($INPUT_ARRAY_TYPE, $OUTPUT_TYPE));
+            let casted_array = cast($INPUT_ARRAY, $OUTPUT_TYPE).unwrap();
+            let result_array = casted_array
+                .as_any()
+                .downcast_ref::<$OUTPUT_TYPE_ARRAY>()
+                .unwrap();
+            assert_eq!($OUTPUT_TYPE, result_array.data_type());
+            assert_eq!(result_array.len(), $OUTPUT_VALUES.len());
+            for (i, x) in $OUTPUT_VALUES.iter().enumerate() {
+                match x {
+                    Some(x) => {
+                        assert_eq!(result_array.value(i), *x);
+                    }
+                    None => {
+                        assert!(result_array.is_null(i));
+                    }
+                }
+            }
+        };
+    }
+
+    // TODO remove this function if the decimal array has the creator function
+    fn create_decimal_array(

Review comment:
       Tracked by https://github.com/apache/arrow-rs/issues/1009

##########
File path: arrow/src/compute/kernels/cast.rs
##########
@@ -281,6 +285,56 @@ macro_rules! cast_floating_point_to_decimal {
     }};
 }
 
+// cast the decimal array to integer array
+macro_rules! cast_decimal_to_integer {
+    ($ARRAY:expr, $SCALE : ident, $VALUE_BUILDER: ident, $NATIVE_TYPE : ident, $DATA_TYPE : expr) => {{
+        let array = $ARRAY.as_any().downcast_ref::<DecimalArray>().unwrap();
+        let mut value_builder = $VALUE_BUILDER::new(array.len());
+        let div: i128 = 10_i128.pow(*$SCALE as u32);
+        let min_bound = ($NATIVE_TYPE::MIN) as i128;
+        let max_bound = ($NATIVE_TYPE::MAX) as i128;
+        for i in 0..array.len() {

Review comment:
       In general I think the use of Builders is not as fast as constructing arrays using `FromIter` as the bounds are checked on each element
   
   In the following case, if we could structure the code in the following way
   
   ```rust
   let output_array: $OUTPUT_ARRAY_TYPE = array
     .iter()
     .map(|v| {
       v.map(|v| { 
       // scale the value v
       })
     }).collect()
   ```
   
   It may be significantly faster. 
   
   Perhaps a good follow on PR

##########
File path: arrow/src/compute/kernels/cast.rs
##########
@@ -1906,26 +1987,193 @@ where
 mod tests {
     use super::*;
     use crate::{buffer::Buffer, util::display::array_value_to_string};
-    use num::traits::Pow;
+
+    macro_rules! generate_cast_test_case {
+        ($INPUT_ARRAY: expr, $INPUT_ARRAY_TYPE: expr, $OUTPUT_TYPE_ARRAY: ident, $OUTPUT_TYPE: expr, $OUTPUT_VALUES: expr) => {
+            // assert cast type
+            assert!(can_cast_types($INPUT_ARRAY_TYPE, $OUTPUT_TYPE));
+            let casted_array = cast($INPUT_ARRAY, $OUTPUT_TYPE).unwrap();
+            let result_array = casted_array
+                .as_any()
+                .downcast_ref::<$OUTPUT_TYPE_ARRAY>()
+                .unwrap();
+            assert_eq!($OUTPUT_TYPE, result_array.data_type());
+            assert_eq!(result_array.len(), $OUTPUT_VALUES.len());
+            for (i, x) in $OUTPUT_VALUES.iter().enumerate() {
+                match x {
+                    Some(x) => {
+                        assert_eq!(result_array.value(i), *x);
+                    }
+                    None => {
+                        assert!(result_array.is_null(i));
+                    }
+                }
+            }
+        };
+    }
+
+    // TODO remove this function if the decimal array has the creator function
+    fn create_decimal_array(
+        array: &Vec<Option<i128>>,
+        precision: usize,
+        scale: usize,
+    ) -> Result<DecimalArray> {
+        let mut decimal_builder = DecimalBuilder::new(array.len(), precision, scale);
+        for value in array {
+            match value {
+                None => {
+                    decimal_builder.append_null()?;
+                }
+                Some(v) => {
+                    decimal_builder.append_value(*v)?;
+                }
+            }
+        }
+        Ok(decimal_builder.finish())
+    }
+
+    #[test]
+    fn test_cast_decimal_to_numeric() {
+        let decimal_type = DataType::Decimal(38, 2);
+        // negative test
+        assert!(!can_cast_types(&decimal_type, &DataType::UInt8));
+        let value_array: Vec<Option<i128>> =
+            vec![Some(125), Some(225), Some(325), None, Some(525)];
+        let decimal_array = create_decimal_array(&value_array, 38, 2).unwrap();
+        let array = Arc::new(decimal_array) as ArrayRef;
+        // i8
+        generate_cast_test_case!(
+            &array,
+            &decimal_type,
+            Int8Array,
+            &DataType::Int8,
+            vec![Some(1_i8), Some(2_i8), Some(3_i8), None, Some(5_i8)]
+        );
+        // i16
+        generate_cast_test_case!(
+            &array,
+            &decimal_type,
+            Int16Array,
+            &DataType::Int16,
+            vec![Some(1_i16), Some(2_i16), Some(3_i16), None, Some(5_i16)]
+        );
+        // i32
+        generate_cast_test_case!(
+            &array,
+            &decimal_type,
+            Int32Array,
+            &DataType::Int32,
+            vec![Some(1_i32), Some(2_i32), Some(3_i32), None, Some(5_i32)]
+        );
+        // i64
+        generate_cast_test_case!(
+            &array,
+            &decimal_type,
+            Int64Array,
+            &DataType::Int64,
+            vec![Some(1_i64), Some(2_i64), Some(3_i64), None, Some(5_i64)]
+        );
+        // f32
+        generate_cast_test_case!(
+            &array,
+            &decimal_type,
+            Int64Array,
+            &DataType::Int64,
+            vec![Some(1_i64), Some(2_i64), Some(3_i64), None, Some(5_i64)]
+        );
+        // f64
+        generate_cast_test_case!(
+            &array,
+            &decimal_type,
+            Int64Array,
+            &DataType::Int64,
+            vec![Some(1_i64), Some(2_i64), Some(3_i64), None, Some(5_i64)]
+        );
+
+        // overflow test: out of range of max i8
+        let value_array: Vec<Option<i128>> = vec![Some(24400)];
+        let decimal_array = create_decimal_array(&value_array, 38, 2).unwrap();
+        let array = Arc::new(decimal_array) as ArrayRef;
+        let casted_array = cast(&array, &DataType::Int8);
+        assert_eq!(
+            "Cast error: value of 244 is out of range Int8".to_string(),
+            casted_array.unwrap_err().to_string()
+        );
+
+        // loss the precision: convert decimal to f32、f64
+        // f32
+        // 112345678_f32 and 112345679_f32 are same, so the 112345679_f32 will lose precision.
+        let value_array: Vec<Option<i128>> = vec![
+            Some(125),
+            Some(225),
+            Some(325),
+            None,
+            Some(525),
+            Some(112345678),
+            Some(112345679),
+        ];
+        let decimal_array = create_decimal_array(&value_array, 38, 2).unwrap();
+        let array = Arc::new(decimal_array) as ArrayRef;
+        generate_cast_test_case!(
+            &array,
+            &decimal_type,
+            Float32Array,
+            &DataType::Float32,
+            vec![
+                Some(1.25_f32),

Review comment:
       👍 

##########
File path: arrow/src/compute/kernels/cast.rs
##########
@@ -1906,26 +1987,193 @@ where
 mod tests {
     use super::*;
     use crate::{buffer::Buffer, util::display::array_value_to_string};
-    use num::traits::Pow;
+
+    macro_rules! generate_cast_test_case {
+        ($INPUT_ARRAY: expr, $INPUT_ARRAY_TYPE: expr, $OUTPUT_TYPE_ARRAY: ident, $OUTPUT_TYPE: expr, $OUTPUT_VALUES: expr) => {
+            // assert cast type
+            assert!(can_cast_types($INPUT_ARRAY_TYPE, $OUTPUT_TYPE));
+            let casted_array = cast($INPUT_ARRAY, $OUTPUT_TYPE).unwrap();
+            let result_array = casted_array
+                .as_any()
+                .downcast_ref::<$OUTPUT_TYPE_ARRAY>()
+                .unwrap();
+            assert_eq!($OUTPUT_TYPE, result_array.data_type());
+            assert_eq!(result_array.len(), $OUTPUT_VALUES.len());
+            for (i, x) in $OUTPUT_VALUES.iter().enumerate() {
+                match x {
+                    Some(x) => {
+                        assert_eq!(result_array.value(i), *x);
+                    }
+                    None => {
+                        assert!(result_array.is_null(i));
+                    }
+                }
+            }
+        };
+    }
+
+    // TODO remove this function if the decimal array has the creator function
+    fn create_decimal_array(
+        array: &Vec<Option<i128>>,
+        precision: usize,
+        scale: usize,
+    ) -> Result<DecimalArray> {
+        let mut decimal_builder = DecimalBuilder::new(array.len(), precision, scale);
+        for value in array {
+            match value {
+                None => {
+                    decimal_builder.append_null()?;
+                }
+                Some(v) => {
+                    decimal_builder.append_value(*v)?;
+                }
+            }
+        }
+        Ok(decimal_builder.finish())
+    }
+
+    #[test]
+    fn test_cast_decimal_to_numeric() {
+        let decimal_type = DataType::Decimal(38, 2);
+        // negative test
+        assert!(!can_cast_types(&decimal_type, &DataType::UInt8));
+        let value_array: Vec<Option<i128>> =
+            vec![Some(125), Some(225), Some(325), None, Some(525)];
+        let decimal_array = create_decimal_array(&value_array, 38, 2).unwrap();
+        let array = Arc::new(decimal_array) as ArrayRef;
+        // i8
+        generate_cast_test_case!(
+            &array,
+            &decimal_type,
+            Int8Array,
+            &DataType::Int8,
+            vec![Some(1_i8), Some(2_i8), Some(3_i8), None, Some(5_i8)]
+        );
+        // i16
+        generate_cast_test_case!(
+            &array,
+            &decimal_type,
+            Int16Array,
+            &DataType::Int16,
+            vec![Some(1_i16), Some(2_i16), Some(3_i16), None, Some(5_i16)]
+        );
+        // i32
+        generate_cast_test_case!(
+            &array,
+            &decimal_type,
+            Int32Array,
+            &DataType::Int32,
+            vec![Some(1_i32), Some(2_i32), Some(3_i32), None, Some(5_i32)]
+        );
+        // i64
+        generate_cast_test_case!(
+            &array,
+            &decimal_type,
+            Int64Array,
+            &DataType::Int64,
+            vec![Some(1_i64), Some(2_i64), Some(3_i64), None, Some(5_i64)]
+        );
+        // f32
+        generate_cast_test_case!(
+            &array,
+            &decimal_type,
+            Int64Array,
+            &DataType::Int64,
+            vec![Some(1_i64), Some(2_i64), Some(3_i64), None, Some(5_i64)]
+        );
+        // f64
+        generate_cast_test_case!(
+            &array,
+            &decimal_type,
+            Int64Array,
+            &DataType::Int64,
+            vec![Some(1_i64), Some(2_i64), Some(3_i64), None, Some(5_i64)]
+        );
+
+        // overflow test: out of range of max i8

Review comment:
       👍 




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org