You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by vi...@apache.org on 2022/10/23 07:08:09 UTC
[arrow-rs] branch master updated: Support decimal256 array in sort kernels (#2912)
This is an automated email from the ASF dual-hosted git repository.
viirya pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 22e742bc7 Support decimal256 array in sort kernels (#2912)
22e742bc7 is described below
commit 22e742bc76b5db333ab2384a92c362662cca3879
Author: Liang-Chi Hsieh <vi...@gmail.com>
AuthorDate: Sun Oct 23 00:08:05 2022 -0700
Support decimal256 array in sort kernels (#2912)
* Support decimal256 array in sort kernels
* Add i256::MAX and i256::MIN cases
---
arrow/src/compute/kernels/sort.rs | 465 ++++++++++++++++++++++++++++++++++++--
1 file changed, 443 insertions(+), 22 deletions(-)
diff --git a/arrow/src/compute/kernels/sort.rs b/arrow/src/compute/kernels/sort.rs
index 6720a0c5c..e2e20e756 100644
--- a/arrow/src/compute/kernels/sort.rs
+++ b/arrow/src/compute/kernels/sort.rs
@@ -149,6 +149,9 @@ pub fn sort_to_indices(
DataType::Decimal128(_, _) => {
sort_primitive::<Decimal128Type, _>(values, v, n, cmp, &options, limit)
}
+ DataType::Decimal256(_, _) => {
+ sort_primitive::<Decimal256Type, _>(values, v, n, cmp, &options, limit)
+ }
DataType::Boolean => sort_boolean(values, v, n, &options, limit),
DataType::Int8 => {
sort_primitive::<Int8Type, _>(values, v, n, cmp, &options, limit)
@@ -1077,39 +1080,77 @@ fn sort_valids_array<T>(
#[cfg(test)]
mod tests {
use super::*;
+ use arrow_buffer::i256;
use rand::rngs::StdRng;
use rand::{Rng, RngCore, SeedableRng};
use std::convert::TryFrom;
use std::sync::Arc;
- fn create_decimal_array(data: Vec<Option<i128>>) -> Decimal128Array {
+ fn create_decimal128_array(data: Vec<Option<i128>>) -> Decimal128Array {
data.into_iter()
.collect::<Decimal128Array>()
.with_precision_and_scale(23, 6)
.unwrap()
}
- fn test_sort_to_indices_decimal_array(
+ fn create_decimal256_array(data: Vec<Option<i256>>) -> Decimal256Array {
+ data.into_iter()
+ .collect::<Decimal256Array>()
+ .with_precision_and_scale(53, 6)
+ .unwrap()
+ }
+
+ fn test_sort_to_indices_decimal128_array(
data: Vec<Option<i128>>,
options: Option<SortOptions>,
limit: Option<usize>,
expected_data: Vec<u32>,
) {
- let output = create_decimal_array(data);
+ let output = create_decimal128_array(data);
+ let expected = UInt32Array::from(expected_data);
+ let output =
+ sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
+ assert_eq!(output, expected)
+ }
+
+ fn test_sort_to_indices_decimal256_array(
+ data: Vec<Option<i256>>,
+ options: Option<SortOptions>,
+ limit: Option<usize>,
+ expected_data: Vec<u32>,
+ ) {
+ let output = create_decimal256_array(data);
let expected = UInt32Array::from(expected_data);
let output =
sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
assert_eq!(output, expected)
}
- fn test_sort_decimal_array(
+ fn test_sort_decimal128_array(
data: Vec<Option<i128>>,
options: Option<SortOptions>,
limit: Option<usize>,
expected_data: Vec<Option<i128>>,
) {
- let output = create_decimal_array(data);
- let expected = Arc::new(create_decimal_array(expected_data)) as ArrayRef;
+ let output = create_decimal128_array(data);
+ let expected = Arc::new(create_decimal128_array(expected_data)) as ArrayRef;
+ let output = match limit {
+ Some(_) => {
+ sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap()
+ }
+ _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
+ };
+ assert_eq!(&output, &expected)
+ }
+
+ fn test_sort_decimal256_array(
+ data: Vec<Option<i256>>,
+ options: Option<SortOptions>,
+ limit: Option<usize>,
+ expected_data: Vec<Option<i256>>,
+ ) {
+ let output = create_decimal256_array(data);
+ let expected = Arc::new(create_decimal256_array(expected_data)) as ArrayRef;
let output = match limit {
Some(_) => {
sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap()
@@ -1758,14 +1799,14 @@ mod tests {
#[test]
fn test_sort_indices_decimal128() {
// decimal default
- test_sort_to_indices_decimal_array(
+ test_sort_to_indices_decimal128_array(
vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
None,
None,
vec![0, 6, 4, 2, 3, 5, 1],
);
// decimal descending
- test_sort_to_indices_decimal_array(
+ test_sort_to_indices_decimal128_array(
vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
Some(SortOptions {
descending: true,
@@ -1775,7 +1816,7 @@ mod tests {
vec![1, 5, 3, 2, 4, 6, 0],
);
// decimal null_first and descending
- test_sort_to_indices_decimal_array(
+ test_sort_to_indices_decimal128_array(
vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
Some(SortOptions {
descending: true,
@@ -1785,7 +1826,7 @@ mod tests {
vec![6, 0, 1, 5, 3, 2, 4],
);
// decimal null_first
- test_sort_to_indices_decimal_array(
+ test_sort_to_indices_decimal128_array(
vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
Some(SortOptions {
descending: false,
@@ -1795,14 +1836,14 @@ mod tests {
vec![0, 6, 4, 2, 3, 5, 1],
);
// limit
- test_sort_to_indices_decimal_array(
+ test_sort_to_indices_decimal128_array(
vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
None,
Some(3),
vec![0, 6, 4],
);
// limit descending
- test_sort_to_indices_decimal_array(
+ test_sort_to_indices_decimal128_array(
vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
Some(SortOptions {
descending: true,
@@ -1812,7 +1853,7 @@ mod tests {
vec![1, 5, 3],
);
// limit descending null_first
- test_sort_to_indices_decimal_array(
+ test_sort_to_indices_decimal128_array(
vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
Some(SortOptions {
descending: true,
@@ -1822,7 +1863,7 @@ mod tests {
vec![6, 0, 1],
);
// limit null_first
- test_sort_to_indices_decimal_array(
+ test_sort_to_indices_decimal128_array(
vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
Some(SortOptions {
descending: false,
@@ -1833,17 +1874,186 @@ mod tests {
);
}
+ #[test]
+ fn test_sort_indices_decimal256() {
+ // decimal default
+ test_sort_to_indices_decimal256_array(
+ vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None]
+ .iter()
+ .map(|v| v.map(i256::from_i128))
+ .collect(),
+ None,
+ None,
+ vec![0, 6, 4, 2, 3, 5, 1],
+ );
+ // decimal descending
+ test_sort_to_indices_decimal256_array(
+ vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None]
+ .iter()
+ .map(|v| v.map(i256::from_i128))
+ .collect(),
+ Some(SortOptions {
+ descending: true,
+ nulls_first: false,
+ }),
+ None,
+ vec![1, 5, 3, 2, 4, 6, 0],
+ );
+ // decimal null_first and descending
+ test_sort_to_indices_decimal256_array(
+ vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None]
+ .iter()
+ .map(|v| v.map(i256::from_i128))
+ .collect(),
+ Some(SortOptions {
+ descending: true,
+ nulls_first: true,
+ }),
+ None,
+ vec![6, 0, 1, 5, 3, 2, 4],
+ );
+ // decimal null_first
+ test_sort_to_indices_decimal256_array(
+ vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None]
+ .iter()
+ .map(|v| v.map(i256::from_i128))
+ .collect(),
+ Some(SortOptions {
+ descending: false,
+ nulls_first: true,
+ }),
+ None,
+ vec![0, 6, 4, 2, 3, 5, 1],
+ );
+ // limit
+ test_sort_to_indices_decimal256_array(
+ vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None]
+ .iter()
+ .map(|v| v.map(i256::from_i128))
+ .collect(),
+ None,
+ Some(3),
+ vec![0, 6, 4],
+ );
+ // limit descending
+ test_sort_to_indices_decimal256_array(
+ vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None]
+ .iter()
+ .map(|v| v.map(i256::from_i128))
+ .collect(),
+ Some(SortOptions {
+ descending: true,
+ nulls_first: false,
+ }),
+ Some(3),
+ vec![1, 5, 3],
+ );
+ // limit descending null_first
+ test_sort_to_indices_decimal256_array(
+ vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None]
+ .iter()
+ .map(|v| v.map(i256::from_i128))
+ .collect(),
+ Some(SortOptions {
+ descending: true,
+ nulls_first: true,
+ }),
+ Some(3),
+ vec![6, 0, 1],
+ );
+ // limit null_first
+ test_sort_to_indices_decimal256_array(
+ vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None]
+ .iter()
+ .map(|v| v.map(i256::from_i128))
+ .collect(),
+ Some(SortOptions {
+ descending: false,
+ nulls_first: true,
+ }),
+ Some(3),
+ vec![0, 6, 4],
+ );
+ }
+
+ #[test]
+ fn test_sort_indices_decimal256_max_min() {
+ test_sort_to_indices_decimal256_array(
+ vec![
+ None,
+ Some(i256::MIN),
+ Some(i256::from_i128(1)),
+ Some(i256::MAX),
+ Some(i256::from_i128(-1)),
+ ],
+ Some(SortOptions {
+ descending: false,
+ nulls_first: true,
+ }),
+ None,
+ vec![0, 1, 4, 2, 3],
+ );
+
+ test_sort_to_indices_decimal256_array(
+ vec![
+ None,
+ Some(i256::MIN),
+ Some(i256::from_i128(1)),
+ Some(i256::MAX),
+ Some(i256::from_i128(-1)),
+ ],
+ Some(SortOptions {
+ descending: true,
+ nulls_first: true,
+ }),
+ None,
+ vec![0, 3, 2, 4, 1],
+ );
+
+ test_sort_to_indices_decimal256_array(
+ vec![
+ None,
+ Some(i256::MIN),
+ Some(i256::from_i128(1)),
+ Some(i256::MAX),
+ Some(i256::from_i128(-1)),
+ ],
+ Some(SortOptions {
+ descending: false,
+ nulls_first: true,
+ }),
+ Some(4),
+ vec![0, 1, 4, 2],
+ );
+
+ test_sort_to_indices_decimal256_array(
+ vec![
+ None,
+ Some(i256::MIN),
+ Some(i256::from_i128(1)),
+ Some(i256::MAX),
+ Some(i256::from_i128(-1)),
+ ],
+ Some(SortOptions {
+ descending: true,
+ nulls_first: true,
+ }),
+ Some(4),
+ vec![0, 3, 2, 4],
+ );
+ }
+
#[test]
fn test_sort_decimal128() {
// decimal default
- test_sort_decimal_array(
+ test_sort_decimal128_array(
vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
None,
None,
vec![None, None, Some(1), Some(2), Some(3), Some(4), Some(5)],
);
// decimal descending
- test_sort_decimal_array(
+ test_sort_decimal128_array(
vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
Some(SortOptions {
descending: true,
@@ -1853,7 +2063,7 @@ mod tests {
vec![Some(5), Some(4), Some(3), Some(2), Some(1), None, None],
);
// decimal null_first and descending
- test_sort_decimal_array(
+ test_sort_decimal128_array(
vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
Some(SortOptions {
descending: true,
@@ -1863,7 +2073,7 @@ mod tests {
vec![None, None, Some(5), Some(4), Some(3), Some(2), Some(1)],
);
// decimal null_first
- test_sort_decimal_array(
+ test_sort_decimal128_array(
vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
Some(SortOptions {
descending: false,
@@ -1873,14 +2083,14 @@ mod tests {
vec![None, None, Some(1), Some(2), Some(3), Some(4), Some(5)],
);
// limit
- test_sort_decimal_array(
+ test_sort_decimal128_array(
vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
None,
Some(3),
vec![None, None, Some(1)],
);
// limit descending
- test_sort_decimal_array(
+ test_sort_decimal128_array(
vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
Some(SortOptions {
descending: true,
@@ -1890,7 +2100,7 @@ mod tests {
vec![Some(5), Some(4), Some(3)],
);
// limit descending null_first
- test_sort_decimal_array(
+ test_sort_decimal128_array(
vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
Some(SortOptions {
descending: true,
@@ -1900,7 +2110,7 @@ mod tests {
vec![None, None, Some(5)],
);
// limit null_first
- test_sort_decimal_array(
+ test_sort_decimal128_array(
vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
Some(SortOptions {
descending: false,
@@ -1911,6 +2121,217 @@ mod tests {
);
}
+ #[test]
+ fn test_sort_decimal256() {
+ // decimal default
+ test_sort_decimal256_array(
+ vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None]
+ .iter()
+ .map(|v| v.map(i256::from_i128))
+ .collect(),
+ None,
+ None,
+ vec![None, None, Some(1), Some(2), Some(3), Some(4), Some(5)]
+ .iter()
+ .map(|v| v.map(i256::from_i128))
+ .collect(),
+ );
+ // decimal descending
+ test_sort_decimal256_array(
+ vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None]
+ .iter()
+ .map(|v| v.map(i256::from_i128))
+ .collect(),
+ Some(SortOptions {
+ descending: true,
+ nulls_first: false,
+ }),
+ None,
+ vec![Some(5), Some(4), Some(3), Some(2), Some(1), None, None]
+ .iter()
+ .map(|v| v.map(i256::from_i128))
+ .collect(),
+ );
+ // decimal null_first and descending
+ test_sort_decimal256_array(
+ vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None]
+ .iter()
+ .map(|v| v.map(i256::from_i128))
+ .collect(),
+ Some(SortOptions {
+ descending: true,
+ nulls_first: true,
+ }),
+ None,
+ vec![None, None, Some(5), Some(4), Some(3), Some(2), Some(1)]
+ .iter()
+ .map(|v| v.map(i256::from_i128))
+ .collect(),
+ );
+ // decimal null_first
+ test_sort_decimal256_array(
+ vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None]
+ .iter()
+ .map(|v| v.map(i256::from_i128))
+ .collect(),
+ Some(SortOptions {
+ descending: false,
+ nulls_first: true,
+ }),
+ None,
+ vec![None, None, Some(1), Some(2), Some(3), Some(4), Some(5)]
+ .iter()
+ .map(|v| v.map(i256::from_i128))
+ .collect(),
+ );
+ // limit
+ test_sort_decimal256_array(
+ vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None]
+ .iter()
+ .map(|v| v.map(i256::from_i128))
+ .collect(),
+ None,
+ Some(3),
+ vec![None, None, Some(1)]
+ .iter()
+ .map(|v| v.map(i256::from_i128))
+ .collect(),
+ );
+ // limit descending
+ test_sort_decimal256_array(
+ vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None]
+ .iter()
+ .map(|v| v.map(i256::from_i128))
+ .collect(),
+ Some(SortOptions {
+ descending: true,
+ nulls_first: false,
+ }),
+ Some(3),
+ vec![Some(5), Some(4), Some(3)]
+ .iter()
+ .map(|v| v.map(i256::from_i128))
+ .collect(),
+ );
+ // limit descending null_first
+ test_sort_decimal256_array(
+ vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None]
+ .iter()
+ .map(|v| v.map(i256::from_i128))
+ .collect(),
+ Some(SortOptions {
+ descending: true,
+ nulls_first: true,
+ }),
+ Some(3),
+ vec![None, None, Some(5)]
+ .iter()
+ .map(|v| v.map(i256::from_i128))
+ .collect(),
+ );
+ // limit null_first
+ test_sort_decimal256_array(
+ vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None]
+ .iter()
+ .map(|v| v.map(i256::from_i128))
+ .collect(),
+ Some(SortOptions {
+ descending: false,
+ nulls_first: true,
+ }),
+ Some(3),
+ vec![None, None, Some(1)]
+ .iter()
+ .map(|v| v.map(i256::from_i128))
+ .collect(),
+ );
+ }
+
+ #[test]
+ fn test_sort_decimal256_max_min() {
+ test_sort_decimal256_array(
+ vec![
+ None,
+ Some(i256::MIN),
+ Some(i256::from_i128(1)),
+ Some(i256::MAX),
+ Some(i256::from_i128(-1)),
+ None,
+ ],
+ Some(SortOptions {
+ descending: false,
+ nulls_first: true,
+ }),
+ None,
+ vec![
+ None,
+ None,
+ Some(i256::MIN),
+ Some(i256::from_i128(-1)),
+ Some(i256::from_i128(1)),
+ Some(i256::MAX),
+ ],
+ );
+
+ test_sort_decimal256_array(
+ vec![
+ None,
+ Some(i256::MIN),
+ Some(i256::from_i128(1)),
+ Some(i256::MAX),
+ Some(i256::from_i128(-1)),
+ None,
+ ],
+ Some(SortOptions {
+ descending: true,
+ nulls_first: true,
+ }),
+ None,
+ vec![
+ None,
+ None,
+ Some(i256::MAX),
+ Some(i256::from_i128(1)),
+ Some(i256::from_i128(-1)),
+ Some(i256::MIN),
+ ],
+ );
+
+ test_sort_decimal256_array(
+ vec![
+ None,
+ Some(i256::MIN),
+ Some(i256::from_i128(1)),
+ Some(i256::MAX),
+ Some(i256::from_i128(-1)),
+ None,
+ ],
+ Some(SortOptions {
+ descending: false,
+ nulls_first: true,
+ }),
+ Some(4),
+ vec![None, None, Some(i256::MIN), Some(i256::from_i128(-1))],
+ );
+
+ test_sort_decimal256_array(
+ vec![
+ None,
+ Some(i256::MIN),
+ Some(i256::from_i128(1)),
+ Some(i256::MAX),
+ Some(i256::from_i128(-1)),
+ None,
+ ],
+ Some(SortOptions {
+ descending: true,
+ nulls_first: true,
+ }),
+ Some(4),
+ vec![None, None, Some(i256::MAX), Some(i256::from_i128(1))],
+ );
+ }
+
#[test]
fn test_sort_primitives() {
// default case