You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ne...@apache.org on 2020/04/26 07:20:15 UTC

[arrow] branch master updated: ARROW-7705: [Rust] Initial sort implementation

This is an automated email from the ASF dual-hosted git repository.

nevime pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 908cfcc  ARROW-7705: [Rust] Initial sort implementation
908cfcc is described below

commit 908cfcc50301b8caefb5ae8cada0e7d138017bdd
Author: Neville Dipale <ne...@gmail.com>
AuthorDate: Sun Apr 26 09:19:41 2020 +0200

    ARROW-7705: [Rust] Initial sort implementation
    
    This implements `sort_to_indices` which returns an u32 array of indices that results in the array being sorted. It then implements `sort` by using `take` to sort the array.
    
    Similar to the approach taken in CPP, I think we can start with a correct approach, then focus on performance after. I will keep this PR a draft for comments and until I complete the initial sort implementation (including sorting in descending order).
    
    Closes #6306 from nevi-me/ARROW-7705
    
    Authored-by: Neville Dipale <ne...@gmail.com>
    Signed-off-by: Neville Dipale <ne...@gmail.com>
---
 rust/arrow/src/compute/kernels/mod.rs  |   1 +
 rust/arrow/src/compute/kernels/sort.rs | 671 +++++++++++++++++++++++++++++++++
 rust/parquet/src/record/api.rs         | 111 +++---
 3 files changed, 732 insertions(+), 51 deletions(-)

diff --git a/rust/arrow/src/compute/kernels/mod.rs b/rust/arrow/src/compute/kernels/mod.rs
index 4e5332c..e13e3fe 100644
--- a/rust/arrow/src/compute/kernels/mod.rs
+++ b/rust/arrow/src/compute/kernels/mod.rs
@@ -24,5 +24,6 @@ pub mod cast;
 pub mod comparison;
 pub mod filter;
 pub mod limit;
+pub mod sort;
 pub mod take;
 pub mod temporal;
diff --git a/rust/arrow/src/compute/kernels/sort.rs b/rust/arrow/src/compute/kernels/sort.rs
new file mode 100644
index 0000000..75e6067
--- /dev/null
+++ b/rust/arrow/src/compute/kernels/sort.rs
@@ -0,0 +1,671 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Defines sort kernel for `ArrayRef`
+
+use std::cmp::Reverse;
+
+use crate::array::*;
+use crate::compute::take;
+use crate::datatypes::*;
+use crate::error::{ArrowError, Result};
+
+use TimeUnit::*;
+
+/// Sort the `ArrayRef` using `SortOptions`.
+///
+/// Performs a stable sort on values and indices, returning nulls after sorted valid values,
+/// while preserving the order of the nulls.
+///
+/// Returns and `ArrowError::ComputeError(String)` if the array type is either unsupported by `sort_to_indices` or `take`.
+pub fn sort(values: &ArrayRef, options: Option<SortOptions>) -> Result<ArrayRef> {
+    let indices = sort_to_indices(values, options)?;
+    take(values, &indices, None)
+}
+
+/// Sort elements from `ArrayRef` into an unsigned integer (`UInt32Array`) of indices
+pub fn sort_to_indices(
+    values: &ArrayRef,
+    options: Option<SortOptions>,
+) -> Result<UInt32Array> {
+    let options = options.unwrap_or_default();
+    let range = values.offset()..values.len();
+    let (v, n): (Vec<usize>, Vec<usize>) =
+        range.partition(|index| values.is_valid(*index));
+    let n = n.into_iter().map(|i| i as u32).collect();
+    match values.data_type() {
+        DataType::Boolean => sort_primitive::<BooleanType>(values, v, n, &options),
+        DataType::Int8 => sort_primitive::<Int8Type>(values, v, n, &options),
+        DataType::Int16 => sort_primitive::<Int16Type>(values, v, n, &options),
+        DataType::Int32 => sort_primitive::<Int32Type>(values, v, n, &options),
+        DataType::Int64 => sort_primitive::<Int64Type>(values, v, n, &options),
+        DataType::UInt8 => sort_primitive::<UInt8Type>(values, v, n, &options),
+        DataType::UInt16 => sort_primitive::<UInt16Type>(values, v, n, &options),
+        DataType::UInt32 => sort_primitive::<UInt32Type>(values, v, n, &options),
+        DataType::UInt64 => sort_primitive::<UInt64Type>(values, v, n, &options),
+        DataType::Date32(_) => sort_primitive::<Date32Type>(values, v, n, &options),
+        DataType::Date64(_) => sort_primitive::<Date64Type>(values, v, n, &options),
+        DataType::Time32(Second) => {
+            sort_primitive::<Time32SecondType>(values, v, n, &options)
+        }
+        DataType::Time32(Millisecond) => {
+            sort_primitive::<Time32MillisecondType>(values, v, n, &options)
+        }
+        DataType::Time64(Microsecond) => {
+            sort_primitive::<Time64MicrosecondType>(values, v, n, &options)
+        }
+        DataType::Time64(Nanosecond) => {
+            sort_primitive::<Time64NanosecondType>(values, v, n, &options)
+        }
+        DataType::Timestamp(Second, _) => {
+            sort_primitive::<TimestampSecondType>(values, v, n, &options)
+        }
+        DataType::Timestamp(Millisecond, _) => {
+            sort_primitive::<TimestampMillisecondType>(values, v, n, &options)
+        }
+        DataType::Timestamp(Microsecond, _) => {
+            sort_primitive::<TimestampMicrosecondType>(values, v, n, &options)
+        }
+        DataType::Timestamp(Nanosecond, _) => {
+            sort_primitive::<TimestampNanosecondType>(values, v, n, &options)
+        }
+        DataType::Interval(IntervalUnit::YearMonth) => {
+            sort_primitive::<IntervalYearMonthType>(values, v, n, &options)
+        }
+        DataType::Interval(IntervalUnit::DayTime) => {
+            sort_primitive::<IntervalDayTimeType>(values, v, n, &options)
+        }
+        DataType::Duration(TimeUnit::Second) => {
+            sort_primitive::<DurationSecondType>(values, v, n, &options)
+        }
+        DataType::Duration(TimeUnit::Millisecond) => {
+            sort_primitive::<DurationMillisecondType>(values, v, n, &options)
+        }
+        DataType::Duration(TimeUnit::Microsecond) => {
+            sort_primitive::<DurationMicrosecondType>(values, v, n, &options)
+        }
+        DataType::Duration(TimeUnit::Nanosecond) => {
+            sort_primitive::<DurationNanosecondType>(values, v, n, &options)
+        }
+        DataType::Utf8 => sort_string(values, v, n, &options),
+        t => Err(ArrowError::ComputeError(format!(
+            "Sort not supported for data type {:?}",
+            t
+        ))),
+    }
+}
+
+/// Options that define how sort kernels should behave
+#[derive(Clone)]
+pub struct SortOptions {
+    /// Whether to sort in descending order
+    descending: bool,
+    /// Whether to sort nulls first
+    nulls_first: bool,
+}
+
+impl Default for SortOptions {
+    fn default() -> Self {
+        Self {
+            descending: false,
+            nulls_first: false,
+        }
+    }
+}
+
+/// Sort primitive values
+fn sort_primitive<T>(
+    values: &ArrayRef,
+    value_indices: Vec<usize>,
+    null_indices: Vec<u32>,
+    options: &SortOptions,
+) -> Result<UInt32Array>
+where
+    T: ArrowPrimitiveType,
+    T::Native: std::cmp::Ord,
+{
+    let values = values
+        .as_any()
+        .downcast_ref::<PrimitiveArray<T>>()
+        .expect("Unable to downcast to primitive array");
+    // create tuples that are used for sorting
+    let mut valids = value_indices
+        .into_iter()
+        .map(|index| (index as u32, values.value(index)))
+        .collect::<Vec<(u32, T::Native)>>();
+    let mut nulls = null_indices;
+    if !options.descending {
+        valids.sort_by_key(|a| a.1);
+    } else {
+        valids.sort_by_key(|a| Reverse(a.1));
+        nulls.reverse();
+    }
+    // collect the order of valid tuples
+    let mut valid_indices: Vec<u32> = valids.iter().map(|tuple| tuple.0).collect();
+
+    if options.nulls_first {
+        nulls.append(&mut valid_indices);
+        return Ok(UInt32Array::from(nulls));
+    }
+    // no need to sort nulls as they are in the correct order already
+    valid_indices.append(&mut nulls);
+
+    Ok(UInt32Array::from(valid_indices))
+}
+
+/// Sort strings
+fn sort_string(
+    values: &ArrayRef,
+    value_indices: Vec<usize>,
+    null_indices: Vec<u32>,
+    options: &SortOptions,
+) -> Result<UInt32Array> {
+    let values = values
+        .as_any()
+        .downcast_ref::<StringArray>()
+        .expect("Unable to downcast to string array");
+    let mut valids = value_indices
+        .into_iter()
+        .map(|index| (index as u32, values.value(index)))
+        .collect::<Vec<(u32, &str)>>();
+    let mut nulls = null_indices;
+    if !options.descending {
+        valids.sort_by_key(|a| a.1);
+    } else {
+        valids.sort_by_key(|a| Reverse(a.1));
+        nulls.reverse();
+    }
+    // collect the order of valid tuplies
+    let mut valid_indices: Vec<u32> = valids.iter().map(|tuple| tuple.0).collect();
+
+    if options.nulls_first {
+        nulls.append(&mut valid_indices);
+        return Ok(UInt32Array::from(nulls));
+    }
+
+    // no need to sort nulls as they are in the correct order already
+    valid_indices.append(&mut nulls);
+
+    Ok(UInt32Array::from(valid_indices))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::{convert::TryFrom, sync::Arc};
+
+    fn test_sort_to_indices_primitive_arrays<T>(
+        data: Vec<Option<T::Native>>,
+        options: Option<SortOptions>,
+        expected_data: Vec<u32>,
+    ) where
+        T: ArrowPrimitiveType,
+        PrimitiveArray<T>: From<Vec<Option<T::Native>>> + ArrayEqual,
+    {
+        let output = PrimitiveArray::<T>::from(data);
+        let expected = UInt32Array::from(expected_data);
+        let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options).unwrap();
+        assert!(output.equals(&expected))
+    }
+
+    fn test_sort_primitive_arrays<T>(
+        data: Vec<Option<T::Native>>,
+        options: Option<SortOptions>,
+        expected_data: Vec<Option<T::Native>>,
+    ) where
+        T: ArrowPrimitiveType,
+        PrimitiveArray<T>: From<Vec<Option<T::Native>>> + ArrayEqual,
+    {
+        let output = PrimitiveArray::<T>::from(data);
+        let expected = PrimitiveArray::<T>::from(expected_data);
+        let output = sort(&(Arc::new(output) as ArrayRef), options).unwrap();
+        let output = output.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();
+        assert!(output.equals(&expected))
+    }
+
+    fn test_sort_to_indices_string_arrays(
+        data: Vec<Option<&str>>,
+        options: Option<SortOptions>,
+        expected_data: Vec<u32>,
+    ) {
+        let output = StringArray::try_from(data).expect("Unable to create string array");
+        let expected = UInt32Array::from(expected_data);
+        let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options).unwrap();
+        assert!(output.equals(&expected))
+    }
+
+    fn test_sort_string_arrays(
+        data: Vec<Option<&str>>,
+        options: Option<SortOptions>,
+        expected_data: Vec<Option<&str>>,
+    ) {
+        let output = StringArray::try_from(data).expect("Unable to create string array");
+        let expected =
+            StringArray::try_from(expected_data).expect("Unable to create string array");
+        let output = sort(&(Arc::new(output) as ArrayRef), options).unwrap();
+        let output = output.as_any().downcast_ref::<StringArray>().unwrap();
+        assert!(output.equals(&expected))
+    }
+
+    #[test]
+    fn test_sort_to_indices_primitives() {
+        test_sort_to_indices_primitive_arrays::<Int8Type>(
+            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
+            None,
+            vec![3, 1, 4, 2, 0, 5],
+        );
+        test_sort_to_indices_primitive_arrays::<Int16Type>(
+            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
+            None,
+            vec![3, 1, 4, 2, 0, 5],
+        );
+        test_sort_to_indices_primitive_arrays::<Int32Type>(
+            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
+            None,
+            vec![3, 1, 4, 2, 0, 5],
+        );
+        test_sort_to_indices_primitive_arrays::<Int64Type>(
+            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
+            None,
+            vec![3, 1, 4, 2, 0, 5],
+        );
+
+        // descending
+        test_sort_to_indices_primitive_arrays::<Int8Type>(
+            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
+            Some(SortOptions {
+                descending: true,
+                nulls_first: false,
+            }),
+            vec![2, 1, 4, 3, 5, 0], // [2, 4, 1, 3, 5, 0]
+        );
+
+        test_sort_to_indices_primitive_arrays::<Int16Type>(
+            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
+            Some(SortOptions {
+                descending: true,
+                nulls_first: false,
+            }),
+            vec![2, 1, 4, 3, 5, 0],
+        );
+
+        test_sort_to_indices_primitive_arrays::<Int32Type>(
+            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
+            Some(SortOptions {
+                descending: true,
+                nulls_first: false,
+            }),
+            vec![2, 1, 4, 3, 5, 0],
+        );
+
+        test_sort_to_indices_primitive_arrays::<Int64Type>(
+            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
+            Some(SortOptions {
+                descending: true,
+                nulls_first: false,
+            }),
+            vec![2, 1, 4, 3, 5, 0],
+        );
+
+        // descending, nulls first
+        test_sort_to_indices_primitive_arrays::<Int8Type>(
+            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
+            Some(SortOptions {
+                descending: true,
+                nulls_first: true,
+            }),
+            vec![5, 0, 2, 1, 4, 3], // [5, 0, 2, 4, 1, 3]
+        );
+
+        test_sort_to_indices_primitive_arrays::<Int16Type>(
+            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
+            Some(SortOptions {
+                descending: true,
+                nulls_first: true,
+            }),
+            vec![5, 0, 2, 1, 4, 3], // [5, 0, 2, 4, 1, 3]
+        );
+
+        test_sort_to_indices_primitive_arrays::<Int32Type>(
+            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
+            Some(SortOptions {
+                descending: true,
+                nulls_first: true,
+            }),
+            vec![5, 0, 2, 1, 4, 3],
+        );
+
+        test_sort_to_indices_primitive_arrays::<Int64Type>(
+            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
+            Some(SortOptions {
+                descending: true,
+                nulls_first: true,
+            }),
+            vec![5, 0, 2, 1, 4, 3],
+        );
+
+        // boolean
+        test_sort_to_indices_primitive_arrays::<BooleanType>(
+            vec![None, Some(false), Some(true), Some(true), Some(false), None],
+            None,
+            vec![1, 4, 2, 3, 0, 5],
+        );
+
+        // boolean, descending
+        test_sort_to_indices_primitive_arrays::<BooleanType>(
+            vec![None, Some(false), Some(true), Some(true), Some(false), None],
+            Some(SortOptions {
+                descending: true,
+                nulls_first: false,
+            }),
+            vec![2, 3, 1, 4, 5, 0],
+        );
+
+        // boolean, descending, nulls first
+        test_sort_to_indices_primitive_arrays::<BooleanType>(
+            vec![None, Some(false), Some(true), Some(true), Some(false), None],
+            Some(SortOptions {
+                descending: true,
+                nulls_first: true,
+            }),
+            vec![5, 0, 2, 3, 1, 4],
+        );
+    }
+
+    #[test]
+    fn test_sort_primitives() {
+        // default case
+        test_sort_primitive_arrays::<UInt8Type>(
+            vec![None, Some(3), Some(5), Some(2), Some(3), None],
+            None,
+            vec![Some(2), Some(3), Some(3), Some(5), None, None],
+        );
+        test_sort_primitive_arrays::<UInt16Type>(
+            vec![None, Some(3), Some(5), Some(2), Some(3), None],
+            None,
+            vec![Some(2), Some(3), Some(3), Some(5), None, None],
+        );
+        test_sort_primitive_arrays::<UInt32Type>(
+            vec![None, Some(3), Some(5), Some(2), Some(3), None],
+            None,
+            vec![Some(2), Some(3), Some(3), Some(5), None, None],
+        );
+        test_sort_primitive_arrays::<UInt64Type>(
+            vec![None, Some(3), Some(5), Some(2), Some(3), None],
+            None,
+            vec![Some(2), Some(3), Some(3), Some(5), None, None],
+        );
+
+        // descending
+        test_sort_primitive_arrays::<Int8Type>(
+            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
+            Some(SortOptions {
+                descending: true,
+                nulls_first: false,
+            }),
+            vec![Some(2), Some(0), Some(0), Some(-1), None, None],
+        );
+        test_sort_primitive_arrays::<Int16Type>(
+            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
+            Some(SortOptions {
+                descending: true,
+                nulls_first: false,
+            }),
+            vec![Some(2), Some(0), Some(0), Some(-1), None, None],
+        );
+        test_sort_primitive_arrays::<Int32Type>(
+            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
+            Some(SortOptions {
+                descending: true,
+                nulls_first: false,
+            }),
+            vec![Some(2), Some(0), Some(0), Some(-1), None, None],
+        );
+        test_sort_primitive_arrays::<Int16Type>(
+            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
+            Some(SortOptions {
+                descending: true,
+                nulls_first: false,
+            }),
+            vec![Some(2), Some(0), Some(0), Some(-1), None, None],
+        );
+
+        // descending, nulls first
+        test_sort_primitive_arrays::<Int8Type>(
+            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
+            Some(SortOptions {
+                descending: true,
+                nulls_first: true,
+            }),
+            vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
+        );
+        test_sort_primitive_arrays::<Int16Type>(
+            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
+            Some(SortOptions {
+                descending: true,
+                nulls_first: true,
+            }),
+            vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
+        );
+        test_sort_primitive_arrays::<Int32Type>(
+            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
+            Some(SortOptions {
+                descending: true,
+                nulls_first: true,
+            }),
+            vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
+        );
+        test_sort_primitive_arrays::<Int64Type>(
+            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
+            Some(SortOptions {
+                descending: true,
+                nulls_first: true,
+            }),
+            vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
+        );
+
+        // int8 nulls first
+        test_sort_primitive_arrays::<Int8Type>(
+            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
+            Some(SortOptions {
+                descending: false,
+                nulls_first: true,
+            }),
+            vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
+        );
+        test_sort_primitive_arrays::<Int16Type>(
+            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
+            Some(SortOptions {
+                descending: false,
+                nulls_first: true,
+            }),
+            vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
+        );
+        test_sort_primitive_arrays::<Int32Type>(
+            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
+            Some(SortOptions {
+                descending: false,
+                nulls_first: true,
+            }),
+            vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
+        );
+        test_sort_primitive_arrays::<Int64Type>(
+            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
+            Some(SortOptions {
+                descending: false,
+                nulls_first: true,
+            }),
+            vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
+        );
+    }
+
+    #[test]
+    fn test_sort_to_indices_strings() {
+        test_sort_to_indices_string_arrays(
+            vec![
+                None,
+                Some("bad"),
+                Some("sad"),
+                None,
+                Some("glad"),
+                Some("-ad"),
+            ],
+            None,
+            vec![5, 1, 4, 2, 0, 3],
+        );
+
+        test_sort_to_indices_string_arrays(
+            vec![
+                None,
+                Some("bad"),
+                Some("sad"),
+                None,
+                Some("glad"),
+                Some("-ad"),
+            ],
+            Some(SortOptions {
+                descending: true,
+                nulls_first: false,
+            }),
+            vec![2, 4, 1, 5, 3, 0],
+        );
+
+        test_sort_to_indices_string_arrays(
+            vec![
+                None,
+                Some("bad"),
+                Some("sad"),
+                None,
+                Some("glad"),
+                Some("-ad"),
+            ],
+            Some(SortOptions {
+                descending: false,
+                nulls_first: true,
+            }),
+            vec![0, 3, 5, 1, 4, 2],
+        );
+
+        test_sort_to_indices_string_arrays(
+            vec![
+                None,
+                Some("bad"),
+                Some("sad"),
+                None,
+                Some("glad"),
+                Some("-ad"),
+            ],
+            Some(SortOptions {
+                descending: true,
+                nulls_first: true,
+            }),
+            vec![3, 0, 2, 4, 1, 5],
+        );
+    }
+
+    #[test]
+    fn test_sort_strings() {
+        test_sort_string_arrays(
+            vec![
+                None,
+                Some("bad"),
+                Some("sad"),
+                None,
+                Some("glad"),
+                Some("-ad"),
+            ],
+            None,
+            vec![
+                Some("-ad"),
+                Some("bad"),
+                Some("glad"),
+                Some("sad"),
+                None,
+                None,
+            ],
+        );
+
+        test_sort_string_arrays(
+            vec![
+                None,
+                Some("bad"),
+                Some("sad"),
+                None,
+                Some("glad"),
+                Some("-ad"),
+            ],
+            Some(SortOptions {
+                descending: true,
+                nulls_first: false,
+            }),
+            vec![
+                Some("sad"),
+                Some("glad"),
+                Some("bad"),
+                Some("-ad"),
+                None,
+                None,
+            ],
+        );
+
+        test_sort_string_arrays(
+            vec![
+                None,
+                Some("bad"),
+                Some("sad"),
+                None,
+                Some("glad"),
+                Some("-ad"),
+            ],
+            Some(SortOptions {
+                descending: false,
+                nulls_first: true,
+            }),
+            vec![
+                None,
+                None,
+                Some("-ad"),
+                Some("bad"),
+                Some("glad"),
+                Some("sad"),
+            ],
+        );
+
+        test_sort_string_arrays(
+            vec![
+                None,
+                Some("bad"),
+                Some("sad"),
+                None,
+                Some("glad"),
+                Some("-ad"),
+            ],
+            Some(SortOptions {
+                descending: true,
+                nulls_first: true,
+            }),
+            vec![
+                None,
+                None,
+                Some("sad"),
+                Some("glad"),
+                Some("bad"),
+                Some("-ad"),
+            ],
+        );
+    }
+}
diff --git a/rust/parquet/src/record/api.rs b/rust/parquet/src/record/api.rs
index f2aa0e1..913f0ab 100644
--- a/rust/parquet/src/record/api.rs
+++ b/rust/parquet/src/record/api.rs
@@ -126,29 +126,35 @@ pub trait RowFormatter {
 /// Macro to generate type-safe get_xxx methods for primitive types,
 /// e.g. `get_bool`, `get_short`.
 macro_rules! row_primitive_accessor {
-  ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
-    fn $METHOD(&self, i: usize) -> Result<$TY> {
-      match self.fields[i].1 {
-        Field::$VARIANT(v) => Ok(v),
-        _ => Err(general_err!("Cannot access {} as {}",
-          self.fields[i].1.get_type_name(), stringify!($VARIANT)))
-      }
-    }
-  }
+    ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
+        fn $METHOD(&self, i: usize) -> Result<$TY> {
+            match self.fields[i].1 {
+                Field::$VARIANT(v) => Ok(v),
+                _ => Err(general_err!(
+                    "Cannot access {} as {}",
+                    self.fields[i].1.get_type_name(),
+                    stringify!($VARIANT)
+                )),
+            }
+        }
+    };
 }
 
 /// Macro to generate type-safe get_xxx methods for reference types,
 /// e.g. `get_list`, `get_map`.
 macro_rules! row_complex_accessor {
-  ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
-    fn $METHOD(&self, i: usize) -> Result<&$TY> {
-      match self.fields[i].1 {
-        Field::$VARIANT(ref v) => Ok(v),
-        _ => Err(general_err!("Cannot access {} as {}",
-          self.fields[i].1.get_type_name(), stringify!($VARIANT)))
-      }
-    }
-  }
+    ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
+        fn $METHOD(&self, i: usize) -> Result<&$TY> {
+            match self.fields[i].1 {
+                Field::$VARIANT(ref v) => Ok(v),
+                _ => Err(general_err!(
+                    "Cannot access {} as {}",
+                    self.fields[i].1.get_type_name(),
+                    stringify!($VARIANT)
+                )),
+            }
+        }
+    };
 }
 
 impl RowFormatter for Row {
@@ -265,33 +271,35 @@ pub trait ListAccessor {
 /// Macro to generate type-safe get_xxx methods for primitive types,
 /// e.g. get_bool, get_short
 macro_rules! list_primitive_accessor {
-  ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
-    fn $METHOD(&self, i: usize) -> Result<$TY> {
-      match self.elements[i] {
-        Field::$VARIANT(v) => Ok(v),
-        _ => Err(general_err!(
-          "Cannot access {} as {}",
-          self.elements[i].get_type_name(), stringify!($VARIANT))
-        )
-      }
-    }
-  }
+    ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
+        fn $METHOD(&self, i: usize) -> Result<$TY> {
+            match self.elements[i] {
+                Field::$VARIANT(v) => Ok(v),
+                _ => Err(general_err!(
+                    "Cannot access {} as {}",
+                    self.elements[i].get_type_name(),
+                    stringify!($VARIANT)
+                )),
+            }
+        }
+    };
 }
 
 /// Macro to generate type-safe get_xxx methods for reference types
 /// e.g. get_list, get_map
 macro_rules! list_complex_accessor {
-  ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
-    fn $METHOD(&self, i: usize) -> Result<&$TY> {
-      match self.elements[i] {
-        Field::$VARIANT(ref v) => Ok(v),
-        _ => Err(general_err!(
-          "Cannot access {} as {}",
-          self.elements[i].get_type_name(), stringify!($VARIANT))
-        )
-      }
-    }
-  }
+    ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
+        fn $METHOD(&self, i: usize) -> Result<&$TY> {
+            match self.elements[i] {
+                Field::$VARIANT(ref v) => Ok(v),
+                _ => Err(general_err!(
+                    "Cannot access {} as {}",
+                    self.elements[i].get_type_name(),
+                    stringify!($VARIANT)
+                )),
+            }
+        }
+    };
 }
 
 impl ListAccessor for List {
@@ -366,17 +374,18 @@ struct MapList<'a> {
 /// Macro to generate type-safe get_xxx methods for primitive types,
 /// e.g. get_bool, get_short
 macro_rules! map_list_primitive_accessor {
-  ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
-    fn $METHOD(&self, i: usize) -> Result<$TY> {
-      match self.elements[i] {
-        Field::$VARIANT(v) => Ok(*v),
-        _ => Err(general_err!(
-          "Cannot access {} as {}",
-          self.elements[i].get_type_name(), stringify!($VARIANT))
-        )
-      }
-    }
-  }
+    ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
+        fn $METHOD(&self, i: usize) -> Result<$TY> {
+            match self.elements[i] {
+                Field::$VARIANT(v) => Ok(*v),
+                _ => Err(general_err!(
+                    "Cannot access {} as {}",
+                    self.elements[i].get_type_name(),
+                    stringify!($VARIANT)
+                )),
+            }
+        }
+    };
 }
 
 impl<'a> ListAccessor for MapList<'a> {