You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by vi...@apache.org on 2022/10/23 07:08:09 UTC

[arrow-rs] branch master updated: Support decimal256 array in sort kernels (#2912)

This is an automated email from the ASF dual-hosted git repository.

viirya pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 22e742bc7 Support decimal256 array in sort kernels (#2912)
22e742bc7 is described below

commit 22e742bc76b5db333ab2384a92c362662cca3879
Author: Liang-Chi Hsieh <vi...@gmail.com>
AuthorDate: Sun Oct 23 00:08:05 2022 -0700

    Support decimal256 array in sort kernels (#2912)
    
    * Support decimal256 array in sort kernels
    
    * Add i256::MAX and i256::MIN cases
---
 arrow/src/compute/kernels/sort.rs | 465 ++++++++++++++++++++++++++++++++++++--
 1 file changed, 443 insertions(+), 22 deletions(-)

diff --git a/arrow/src/compute/kernels/sort.rs b/arrow/src/compute/kernels/sort.rs
index 6720a0c5c..e2e20e756 100644
--- a/arrow/src/compute/kernels/sort.rs
+++ b/arrow/src/compute/kernels/sort.rs
@@ -149,6 +149,9 @@ pub fn sort_to_indices(
         DataType::Decimal128(_, _) => {
             sort_primitive::<Decimal128Type, _>(values, v, n, cmp, &options, limit)
         }
+        DataType::Decimal256(_, _) => {
+            sort_primitive::<Decimal256Type, _>(values, v, n, cmp, &options, limit)
+        }
         DataType::Boolean => sort_boolean(values, v, n, &options, limit),
         DataType::Int8 => {
             sort_primitive::<Int8Type, _>(values, v, n, cmp, &options, limit)
@@ -1077,39 +1080,77 @@ fn sort_valids_array<T>(
 #[cfg(test)]
 mod tests {
     use super::*;
+    use arrow_buffer::i256;
     use rand::rngs::StdRng;
     use rand::{Rng, RngCore, SeedableRng};
     use std::convert::TryFrom;
     use std::sync::Arc;
 
-    fn create_decimal_array(data: Vec<Option<i128>>) -> Decimal128Array {
+    fn create_decimal128_array(data: Vec<Option<i128>>) -> Decimal128Array {
         data.into_iter()
             .collect::<Decimal128Array>()
             .with_precision_and_scale(23, 6)
             .unwrap()
     }
 
-    fn test_sort_to_indices_decimal_array(
+    fn create_decimal256_array(data: Vec<Option<i256>>) -> Decimal256Array {
+        data.into_iter()
+            .collect::<Decimal256Array>()
+            .with_precision_and_scale(53, 6)
+            .unwrap()
+    }
+
+    fn test_sort_to_indices_decimal128_array(
         data: Vec<Option<i128>>,
         options: Option<SortOptions>,
         limit: Option<usize>,
         expected_data: Vec<u32>,
     ) {
-        let output = create_decimal_array(data);
+        let output = create_decimal128_array(data);
+        let expected = UInt32Array::from(expected_data);
+        let output =
+            sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
+        assert_eq!(output, expected)
+    }
+
+    fn test_sort_to_indices_decimal256_array(
+        data: Vec<Option<i256>>,
+        options: Option<SortOptions>,
+        limit: Option<usize>,
+        expected_data: Vec<u32>,
+    ) {
+        let output = create_decimal256_array(data);
         let expected = UInt32Array::from(expected_data);
         let output =
             sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
         assert_eq!(output, expected)
     }
 
-    fn test_sort_decimal_array(
+    fn test_sort_decimal128_array(
         data: Vec<Option<i128>>,
         options: Option<SortOptions>,
         limit: Option<usize>,
         expected_data: Vec<Option<i128>>,
     ) {
-        let output = create_decimal_array(data);
-        let expected = Arc::new(create_decimal_array(expected_data)) as ArrayRef;
+        let output = create_decimal128_array(data);
+        let expected = Arc::new(create_decimal128_array(expected_data)) as ArrayRef;
+        let output = match limit {
+            Some(_) => {
+                sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap()
+            }
+            _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
+        };
+        assert_eq!(&output, &expected)
+    }
+
+    fn test_sort_decimal256_array(
+        data: Vec<Option<i256>>,
+        options: Option<SortOptions>,
+        limit: Option<usize>,
+        expected_data: Vec<Option<i256>>,
+    ) {
+        let output = create_decimal256_array(data);
+        let expected = Arc::new(create_decimal256_array(expected_data)) as ArrayRef;
         let output = match limit {
             Some(_) => {
                 sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap()
@@ -1758,14 +1799,14 @@ mod tests {
     #[test]
     fn test_sort_indices_decimal128() {
         // decimal default
-        test_sort_to_indices_decimal_array(
+        test_sort_to_indices_decimal128_array(
             vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
             None,
             None,
             vec![0, 6, 4, 2, 3, 5, 1],
         );
         // decimal descending
-        test_sort_to_indices_decimal_array(
+        test_sort_to_indices_decimal128_array(
             vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
             Some(SortOptions {
                 descending: true,
@@ -1775,7 +1816,7 @@ mod tests {
             vec![1, 5, 3, 2, 4, 6, 0],
         );
         // decimal null_first and descending
-        test_sort_to_indices_decimal_array(
+        test_sort_to_indices_decimal128_array(
             vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
             Some(SortOptions {
                 descending: true,
@@ -1785,7 +1826,7 @@ mod tests {
             vec![6, 0, 1, 5, 3, 2, 4],
         );
         // decimal null_first
-        test_sort_to_indices_decimal_array(
+        test_sort_to_indices_decimal128_array(
             vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
             Some(SortOptions {
                 descending: false,
@@ -1795,14 +1836,14 @@ mod tests {
             vec![0, 6, 4, 2, 3, 5, 1],
         );
         // limit
-        test_sort_to_indices_decimal_array(
+        test_sort_to_indices_decimal128_array(
             vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
             None,
             Some(3),
             vec![0, 6, 4],
         );
         // limit descending
-        test_sort_to_indices_decimal_array(
+        test_sort_to_indices_decimal128_array(
             vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
             Some(SortOptions {
                 descending: true,
@@ -1812,7 +1853,7 @@ mod tests {
             vec![1, 5, 3],
         );
         // limit descending null_first
-        test_sort_to_indices_decimal_array(
+        test_sort_to_indices_decimal128_array(
             vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
             Some(SortOptions {
                 descending: true,
@@ -1822,7 +1863,7 @@ mod tests {
             vec![6, 0, 1],
         );
         // limit null_first
-        test_sort_to_indices_decimal_array(
+        test_sort_to_indices_decimal128_array(
             vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
             Some(SortOptions {
                 descending: false,
@@ -1833,17 +1874,186 @@ mod tests {
         );
     }
 
+    #[test]
+    fn test_sort_indices_decimal256() {
+        // decimal default
+        test_sort_to_indices_decimal256_array(
+            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None]
+                .iter()
+                .map(|v| v.map(i256::from_i128))
+                .collect(),
+            None,
+            None,
+            vec![0, 6, 4, 2, 3, 5, 1],
+        );
+        // decimal descending
+        test_sort_to_indices_decimal256_array(
+            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None]
+                .iter()
+                .map(|v| v.map(i256::from_i128))
+                .collect(),
+            Some(SortOptions {
+                descending: true,
+                nulls_first: false,
+            }),
+            None,
+            vec![1, 5, 3, 2, 4, 6, 0],
+        );
+        // decimal null_first and descending
+        test_sort_to_indices_decimal256_array(
+            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None]
+                .iter()
+                .map(|v| v.map(i256::from_i128))
+                .collect(),
+            Some(SortOptions {
+                descending: true,
+                nulls_first: true,
+            }),
+            None,
+            vec![6, 0, 1, 5, 3, 2, 4],
+        );
+        // decimal null_first
+        test_sort_to_indices_decimal256_array(
+            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None]
+                .iter()
+                .map(|v| v.map(i256::from_i128))
+                .collect(),
+            Some(SortOptions {
+                descending: false,
+                nulls_first: true,
+            }),
+            None,
+            vec![0, 6, 4, 2, 3, 5, 1],
+        );
+        // limit
+        test_sort_to_indices_decimal256_array(
+            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None]
+                .iter()
+                .map(|v| v.map(i256::from_i128))
+                .collect(),
+            None,
+            Some(3),
+            vec![0, 6, 4],
+        );
+        // limit descending
+        test_sort_to_indices_decimal256_array(
+            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None]
+                .iter()
+                .map(|v| v.map(i256::from_i128))
+                .collect(),
+            Some(SortOptions {
+                descending: true,
+                nulls_first: false,
+            }),
+            Some(3),
+            vec![1, 5, 3],
+        );
+        // limit descending null_first
+        test_sort_to_indices_decimal256_array(
+            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None]
+                .iter()
+                .map(|v| v.map(i256::from_i128))
+                .collect(),
+            Some(SortOptions {
+                descending: true,
+                nulls_first: true,
+            }),
+            Some(3),
+            vec![6, 0, 1],
+        );
+        // limit null_first
+        test_sort_to_indices_decimal256_array(
+            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None]
+                .iter()
+                .map(|v| v.map(i256::from_i128))
+                .collect(),
+            Some(SortOptions {
+                descending: false,
+                nulls_first: true,
+            }),
+            Some(3),
+            vec![0, 6, 4],
+        );
+    }
+
+    #[test]
+    fn test_sort_indices_decimal256_max_min() {
+        test_sort_to_indices_decimal256_array(
+            vec![
+                None,
+                Some(i256::MIN),
+                Some(i256::from_i128(1)),
+                Some(i256::MAX),
+                Some(i256::from_i128(-1)),
+            ],
+            Some(SortOptions {
+                descending: false,
+                nulls_first: true,
+            }),
+            None,
+            vec![0, 1, 4, 2, 3],
+        );
+
+        test_sort_to_indices_decimal256_array(
+            vec![
+                None,
+                Some(i256::MIN),
+                Some(i256::from_i128(1)),
+                Some(i256::MAX),
+                Some(i256::from_i128(-1)),
+            ],
+            Some(SortOptions {
+                descending: true,
+                nulls_first: true,
+            }),
+            None,
+            vec![0, 3, 2, 4, 1],
+        );
+
+        test_sort_to_indices_decimal256_array(
+            vec![
+                None,
+                Some(i256::MIN),
+                Some(i256::from_i128(1)),
+                Some(i256::MAX),
+                Some(i256::from_i128(-1)),
+            ],
+            Some(SortOptions {
+                descending: false,
+                nulls_first: true,
+            }),
+            Some(4),
+            vec![0, 1, 4, 2],
+        );
+
+        test_sort_to_indices_decimal256_array(
+            vec![
+                None,
+                Some(i256::MIN),
+                Some(i256::from_i128(1)),
+                Some(i256::MAX),
+                Some(i256::from_i128(-1)),
+            ],
+            Some(SortOptions {
+                descending: true,
+                nulls_first: true,
+            }),
+            Some(4),
+            vec![0, 3, 2, 4],
+        );
+    }
+
     #[test]
     fn test_sort_decimal128() {
         // decimal default
-        test_sort_decimal_array(
+        test_sort_decimal128_array(
             vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
             None,
             None,
             vec![None, None, Some(1), Some(2), Some(3), Some(4), Some(5)],
         );
         // decimal descending
-        test_sort_decimal_array(
+        test_sort_decimal128_array(
             vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
             Some(SortOptions {
                 descending: true,
@@ -1853,7 +2063,7 @@ mod tests {
             vec![Some(5), Some(4), Some(3), Some(2), Some(1), None, None],
         );
         // decimal null_first and descending
-        test_sort_decimal_array(
+        test_sort_decimal128_array(
             vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
             Some(SortOptions {
                 descending: true,
@@ -1863,7 +2073,7 @@ mod tests {
             vec![None, None, Some(5), Some(4), Some(3), Some(2), Some(1)],
         );
         // decimal null_first
-        test_sort_decimal_array(
+        test_sort_decimal128_array(
             vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
             Some(SortOptions {
                 descending: false,
@@ -1873,14 +2083,14 @@ mod tests {
             vec![None, None, Some(1), Some(2), Some(3), Some(4), Some(5)],
         );
         // limit
-        test_sort_decimal_array(
+        test_sort_decimal128_array(
             vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
             None,
             Some(3),
             vec![None, None, Some(1)],
         );
         // limit descending
-        test_sort_decimal_array(
+        test_sort_decimal128_array(
             vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
             Some(SortOptions {
                 descending: true,
@@ -1890,7 +2100,7 @@ mod tests {
             vec![Some(5), Some(4), Some(3)],
         );
         // limit descending null_first
-        test_sort_decimal_array(
+        test_sort_decimal128_array(
             vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
             Some(SortOptions {
                 descending: true,
@@ -1900,7 +2110,7 @@ mod tests {
             vec![None, None, Some(5)],
         );
         // limit null_first
-        test_sort_decimal_array(
+        test_sort_decimal128_array(
             vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
             Some(SortOptions {
                 descending: false,
@@ -1911,6 +2121,217 @@ mod tests {
         );
     }
 
+    #[test]
+    fn test_sort_decimal256() {
+        // decimal default
+        test_sort_decimal256_array(
+            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None]
+                .iter()
+                .map(|v| v.map(i256::from_i128))
+                .collect(),
+            None,
+            None,
+            vec![None, None, Some(1), Some(2), Some(3), Some(4), Some(5)]
+                .iter()
+                .map(|v| v.map(i256::from_i128))
+                .collect(),
+        );
+        // decimal descending
+        test_sort_decimal256_array(
+            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None]
+                .iter()
+                .map(|v| v.map(i256::from_i128))
+                .collect(),
+            Some(SortOptions {
+                descending: true,
+                nulls_first: false,
+            }),
+            None,
+            vec![Some(5), Some(4), Some(3), Some(2), Some(1), None, None]
+                .iter()
+                .map(|v| v.map(i256::from_i128))
+                .collect(),
+        );
+        // decimal null_first and descending
+        test_sort_decimal256_array(
+            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None]
+                .iter()
+                .map(|v| v.map(i256::from_i128))
+                .collect(),
+            Some(SortOptions {
+                descending: true,
+                nulls_first: true,
+            }),
+            None,
+            vec![None, None, Some(5), Some(4), Some(3), Some(2), Some(1)]
+                .iter()
+                .map(|v| v.map(i256::from_i128))
+                .collect(),
+        );
+        // decimal null_first
+        test_sort_decimal256_array(
+            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None]
+                .iter()
+                .map(|v| v.map(i256::from_i128))
+                .collect(),
+            Some(SortOptions {
+                descending: false,
+                nulls_first: true,
+            }),
+            None,
+            vec![None, None, Some(1), Some(2), Some(3), Some(4), Some(5)]
+                .iter()
+                .map(|v| v.map(i256::from_i128))
+                .collect(),
+        );
+        // limit
+        test_sort_decimal256_array(
+            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None]
+                .iter()
+                .map(|v| v.map(i256::from_i128))
+                .collect(),
+            None,
+            Some(3),
+            vec![None, None, Some(1)]
+                .iter()
+                .map(|v| v.map(i256::from_i128))
+                .collect(),
+        );
+        // limit descending
+        test_sort_decimal256_array(
+            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None]
+                .iter()
+                .map(|v| v.map(i256::from_i128))
+                .collect(),
+            Some(SortOptions {
+                descending: true,
+                nulls_first: false,
+            }),
+            Some(3),
+            vec![Some(5), Some(4), Some(3)]
+                .iter()
+                .map(|v| v.map(i256::from_i128))
+                .collect(),
+        );
+        // limit descending null_first
+        test_sort_decimal256_array(
+            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None]
+                .iter()
+                .map(|v| v.map(i256::from_i128))
+                .collect(),
+            Some(SortOptions {
+                descending: true,
+                nulls_first: true,
+            }),
+            Some(3),
+            vec![None, None, Some(5)]
+                .iter()
+                .map(|v| v.map(i256::from_i128))
+                .collect(),
+        );
+        // limit null_first
+        test_sort_decimal256_array(
+            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None]
+                .iter()
+                .map(|v| v.map(i256::from_i128))
+                .collect(),
+            Some(SortOptions {
+                descending: false,
+                nulls_first: true,
+            }),
+            Some(3),
+            vec![None, None, Some(1)]
+                .iter()
+                .map(|v| v.map(i256::from_i128))
+                .collect(),
+        );
+    }
+
+    #[test]
+    fn test_sort_decimal256_max_min() {
+        test_sort_decimal256_array(
+            vec![
+                None,
+                Some(i256::MIN),
+                Some(i256::from_i128(1)),
+                Some(i256::MAX),
+                Some(i256::from_i128(-1)),
+                None,
+            ],
+            Some(SortOptions {
+                descending: false,
+                nulls_first: true,
+            }),
+            None,
+            vec![
+                None,
+                None,
+                Some(i256::MIN),
+                Some(i256::from_i128(-1)),
+                Some(i256::from_i128(1)),
+                Some(i256::MAX),
+            ],
+        );
+
+        test_sort_decimal256_array(
+            vec![
+                None,
+                Some(i256::MIN),
+                Some(i256::from_i128(1)),
+                Some(i256::MAX),
+                Some(i256::from_i128(-1)),
+                None,
+            ],
+            Some(SortOptions {
+                descending: true,
+                nulls_first: true,
+            }),
+            None,
+            vec![
+                None,
+                None,
+                Some(i256::MAX),
+                Some(i256::from_i128(1)),
+                Some(i256::from_i128(-1)),
+                Some(i256::MIN),
+            ],
+        );
+
+        test_sort_decimal256_array(
+            vec![
+                None,
+                Some(i256::MIN),
+                Some(i256::from_i128(1)),
+                Some(i256::MAX),
+                Some(i256::from_i128(-1)),
+                None,
+            ],
+            Some(SortOptions {
+                descending: false,
+                nulls_first: true,
+            }),
+            Some(4),
+            vec![None, None, Some(i256::MIN), Some(i256::from_i128(-1))],
+        );
+
+        test_sort_decimal256_array(
+            vec![
+                None,
+                Some(i256::MIN),
+                Some(i256::from_i128(1)),
+                Some(i256::MAX),
+                Some(i256::from_i128(-1)),
+                None,
+            ],
+            Some(SortOptions {
+                descending: true,
+                nulls_first: true,
+            }),
+            Some(4),
+            vec![None, None, Some(i256::MAX), Some(i256::from_i128(1))],
+        );
+    }
+
     #[test]
     fn test_sort_primitives() {
         // default case