You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2022/10/31 17:26:21 UTC

[arrow-rs] branch master updated: Fix ignored limit on `lexsort_to_indices` (#2991)

This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 66c963674 Fix ignored limit on `lexsort_to_indices` (#2991)
66c963674 is described below

commit 66c9636742162f832b434a513769e158f9723e67
Author: Andrew Lamb <an...@nerdnetworks.org>
AuthorDate: Mon Oct 31 13:26:15 2022 -0400

    Fix ignored limit on `lexsort_to_indices` (#2991)
    
    * Fix ignored limit on lexsort_to_indices
    
    * Update comments
    
    * Update arrow/src/compute/kernels/sort.rs
    
    Co-authored-by: Batuhan Taskaya <is...@gmail.com>
    
    Co-authored-by: Batuhan Taskaya <is...@gmail.com>
---
 arrow/src/compute/kernels/sort.rs | 40 ++++++++++++++++++++++++++++++++-------
 1 file changed, 33 insertions(+), 7 deletions(-)

diff --git a/arrow/src/compute/kernels/sort.rs b/arrow/src/compute/kernels/sort.rs
index b29762264..a10e674ac 100644
--- a/arrow/src/compute/kernels/sort.rs
+++ b/arrow/src/compute/kernels/sort.rs
@@ -950,7 +950,7 @@ pub fn lexsort_to_indices(
     });
 
     Ok(UInt32Array::from_iter_values(
-        value_indices.iter().map(|i| *i as u32),
+        value_indices.iter().take(len).map(|i| *i as u32),
     ))
 }
 
@@ -1422,6 +1422,18 @@ mod tests {
         }
     }
 
+    /// slice all arrays in expected_output to offset/length
+    fn slice_arrays(
+        expected_output: Vec<ArrayRef>,
+        offset: usize,
+        length: usize,
+    ) -> Vec<ArrayRef> {
+        expected_output
+            .into_iter()
+            .map(|array| array.slice(offset, length))
+            .collect()
+    }
+
     fn test_sort_binary_arrays(
         data: Vec<Option<Vec<u8>>>,
         options: Option<SortOptions>,
@@ -3439,8 +3451,10 @@ mod tests {
             Some(2),
             Some(17),
         ])) as ArrayRef];
-        test_lex_sort_arrays(input.clone(), expected, None);
+        test_lex_sort_arrays(input.clone(), expected.clone(), None);
+        test_lex_sort_arrays(input.clone(), slice_arrays(expected, 0, 2), Some(2));
 
+        // Explicitly test a limit on the sort as a demonstration
         let expected = vec![Arc::new(PrimitiveArray::<Int64Type>::from(vec![
             Some(-1),
             Some(0),
@@ -3519,7 +3533,8 @@ mod tests {
                 Some(-2),
             ])) as ArrayRef,
         ];
-        test_lex_sort_arrays(input, expected, None);
+        test_lex_sort_arrays(input.clone(), expected.clone(), None);
+        test_lex_sort_arrays(input, slice_arrays(expected, 0, 2), Some(2));
 
         // test mix of string and in64 with option
         let input = vec![
@@ -3562,7 +3577,8 @@ mod tests {
                 Some("7"),
             ])) as ArrayRef,
         ];
-        test_lex_sort_arrays(input, expected, None);
+        test_lex_sort_arrays(input.clone(), expected.clone(), None);
+        test_lex_sort_arrays(input, slice_arrays(expected, 0, 3), Some(3));
 
         // test sort with nulls first
         let input = vec![
@@ -3605,7 +3621,8 @@ mod tests {
                 Some("world"),
             ])) as ArrayRef,
         ];
-        test_lex_sort_arrays(input, expected, None);
+        test_lex_sort_arrays(input.clone(), expected.clone(), None);
+        test_lex_sort_arrays(input, slice_arrays(expected, 0, 1), Some(1));
 
         // test sort with nulls last
         let input = vec![
@@ -3648,7 +3665,8 @@ mod tests {
                 None,
             ])) as ArrayRef,
         ];
-        test_lex_sort_arrays(input, expected, None);
+        test_lex_sort_arrays(input.clone(), expected.clone(), None);
+        test_lex_sort_arrays(input, slice_arrays(expected, 0, 2), Some(2));
 
         // test sort with opposite options
         let input = vec![
@@ -3695,7 +3713,15 @@ mod tests {
                 Some("foo"),
             ])) as ArrayRef,
         ];
-        test_lex_sort_arrays(input, expected, None);
+        test_lex_sort_arrays(input.clone(), expected.clone(), None);
+        test_lex_sort_arrays(
+            input.clone(),
+            slice_arrays(expected.clone(), 0, 5),
+            Some(5),
+        );
+
+        // Limiting by more rows than present is ok
+        test_lex_sort_arrays(input, slice_arrays(expected, 0, 5), Some(10));
     }
 
     #[test]