You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2022/10/31 17:32:38 UTC
[arrow-rs] 01/02: Fix ignored limit on `lexsort_to_indices` (#2991)
This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to tag 26.0.0
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
commit fb2b49c202de28a166cab4fbbe84e0024ed706d4
Author: Andrew Lamb <an...@nerdnetworks.org>
AuthorDate: Mon Oct 31 13:26:15 2022 -0400
Fix ignored limit on `lexsort_to_indices` (#2991)
* Fix ignored limit on lexsort_to_indices
* Update comments
* Update arrow/src/compute/kernels/sort.rs
Co-authored-by: Batuhan Taskaya <is...@gmail.com>
Co-authored-by: Batuhan Taskaya <is...@gmail.com>
---
arrow/src/compute/kernels/sort.rs | 40 ++++++++++++++++++++++++++++++++-------
1 file changed, 33 insertions(+), 7 deletions(-)
diff --git a/arrow/src/compute/kernels/sort.rs b/arrow/src/compute/kernels/sort.rs
index b29762264..a10e674ac 100644
--- a/arrow/src/compute/kernels/sort.rs
+++ b/arrow/src/compute/kernels/sort.rs
@@ -950,7 +950,7 @@ pub fn lexsort_to_indices(
});
Ok(UInt32Array::from_iter_values(
- value_indices.iter().map(|i| *i as u32),
+ value_indices.iter().take(len).map(|i| *i as u32),
))
}
@@ -1422,6 +1422,18 @@ mod tests {
}
}
+ /// slice all arrays in expected_output to offset/length
+ fn slice_arrays(
+ expected_output: Vec<ArrayRef>,
+ offset: usize,
+ length: usize,
+ ) -> Vec<ArrayRef> {
+ expected_output
+ .into_iter()
+ .map(|array| array.slice(offset, length))
+ .collect()
+ }
+
fn test_sort_binary_arrays(
data: Vec<Option<Vec<u8>>>,
options: Option<SortOptions>,
@@ -3439,8 +3451,10 @@ mod tests {
Some(2),
Some(17),
])) as ArrayRef];
- test_lex_sort_arrays(input.clone(), expected, None);
+ test_lex_sort_arrays(input.clone(), expected.clone(), None);
+ test_lex_sort_arrays(input.clone(), slice_arrays(expected, 0, 2), Some(2));
+ // Explicitly test a limit on the sort as a demonstration
let expected = vec![Arc::new(PrimitiveArray::<Int64Type>::from(vec![
Some(-1),
Some(0),
@@ -3519,7 +3533,8 @@ mod tests {
Some(-2),
])) as ArrayRef,
];
- test_lex_sort_arrays(input, expected, None);
+ test_lex_sort_arrays(input.clone(), expected.clone(), None);
+ test_lex_sort_arrays(input, slice_arrays(expected, 0, 2), Some(2));
// test mix of string and in64 with option
let input = vec![
@@ -3562,7 +3577,8 @@ mod tests {
Some("7"),
])) as ArrayRef,
];
- test_lex_sort_arrays(input, expected, None);
+ test_lex_sort_arrays(input.clone(), expected.clone(), None);
+ test_lex_sort_arrays(input, slice_arrays(expected, 0, 3), Some(3));
// test sort with nulls first
let input = vec![
@@ -3605,7 +3621,8 @@ mod tests {
Some("world"),
])) as ArrayRef,
];
- test_lex_sort_arrays(input, expected, None);
+ test_lex_sort_arrays(input.clone(), expected.clone(), None);
+ test_lex_sort_arrays(input, slice_arrays(expected, 0, 1), Some(1));
// test sort with nulls last
let input = vec![
@@ -3648,7 +3665,8 @@ mod tests {
None,
])) as ArrayRef,
];
- test_lex_sort_arrays(input, expected, None);
+ test_lex_sort_arrays(input.clone(), expected.clone(), None);
+ test_lex_sort_arrays(input, slice_arrays(expected, 0, 2), Some(2));
// test sort with opposite options
let input = vec![
@@ -3695,7 +3713,15 @@ mod tests {
Some("foo"),
])) as ArrayRef,
];
- test_lex_sort_arrays(input, expected, None);
+ test_lex_sort_arrays(input.clone(), expected.clone(), None);
+ test_lex_sort_arrays(
+ input.clone(),
+ slice_arrays(expected.clone(), 0, 5),
+ Some(5),
+ );
+
+ // Limiting by more rows than present is ok
+ test_lex_sort_arrays(input, slice_arrays(expected, 0, 5), Some(10));
}
#[test]