You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2020/09/02 12:28:44 UTC

[GitHub] [arrow] jhorstmann commented on a change in pull request #8092: ARROW-9895: [Rust] Improve sorting kernels

jhorstmann commented on a change in pull request #8092:
URL: https://github.com/apache/arrow/pull/8092#discussion_r482029030



##########
File path: rust/arrow/src/array/ord.rs
##########
@@ -68,65 +80,252 @@ impl OrdArray for NullArray {
     }
 }
 
+macro_rules! float_ord_cmp {
+    ($NAME: ident, $T: ty) => {
+        #[inline]
+        fn $NAME(a: $T, b: $T) -> Ordering {
+            if a < b {
+                return Ordering::Less;
+            }
+            if a > b {
+                return Ordering::Greater;
+            }
+
+            // convert to bits with canonical pattern for NaN
+            let a = if a.is_nan() {
+                <$T>::NAN.to_bits()
+            } else {
+                a.to_bits()
+            };
+            let b = if b.is_nan() {
+                <$T>::NAN.to_bits()
+            } else {
+                b.to_bits()
+            };
+
+            if a == b {
+                // Equal or both NaN
+                Ordering::Equal
+            } else if a < b {
+                // (-0.0, 0.0) or (!NaN, NaN)
+                Ordering::Less
+            } else {
+                // (0.0, -0.0) or (NaN, !NaN)
+                Ordering::Greater
+            }
+        }
+    };
+}
+
+float_ord_cmp!(cmp_f64, f64);
+float_ord_cmp!(cmp_f32, f32);
+
+#[repr(transparent)]
+struct Float64ArrayAsOrdArray<'a>(&'a Float64Array);
+#[repr(transparent)]
+struct Float32ArrayAsOrdArray<'a>(&'a Float32Array);
+
+impl OrdArray for Float64ArrayAsOrdArray<'_> {
+    fn cmp_value(&self, i: usize, j: usize) -> Ordering {
+        let a: f64 = self.0.value(i);
+        let b: f64 = self.0.value(j);
+
+        cmp_f64(a, b)
+    }
+}
+
+impl OrdArray for Float32ArrayAsOrdArray<'_> {
+    fn cmp_value(&self, i: usize, j: usize) -> Ordering {
+        let a: f32 = self.0.value(i);
+        let b: f32 = self.0.value(j);
+
+        cmp_f32(a, b)
+    }
+}
+
+fn float32_as_ord_array<'a>(array: &'a ArrayRef) -> Box<dyn OrdArray + 'a> {
+    let float_array: &Float32Array = as_primitive_array::<Float32Type>(array);
+    //let clone = std::mem::ManuallyDrop::new(float_vec);
+
+    //let as_ord = unsafe { &*(float_array as *const Float32Array as *const Float32ArrayAsOrdArray) };
+
+    //println!("before transmute");
+    //let as_ord: &Float32ArrayAsOrdArray = unsafe { std::mem::transmute_copy(array) };
+    //println!("after transmute");
+    //let as_ord = Float32ArrayAsOrdArray(float_array);
+    //as_ord
+    Box::new(Float32ArrayAsOrdArray(float_array))
+}
+
+fn float64_as_ord_array<'a>(array: &'a ArrayRef) -> Box<dyn OrdArray + 'a> {
+    let float_array: &Float64Array = as_primitive_array::<Float64Type>(array);
+
+    //println!("before transmute");
+    //let as_ord: &Float64ArrayAsOrdArray = unsafe { std::mem::transmute_copy(array) };
+    //let as_ord = unsafe { &*(float_array as *const Float64Array as *const Float64ArrayAsOrdArray) };
+    //let as_ord = unsafe { &*(array as *const ArrayRef as *const Float64ArrayAsOrdArray )};
+    //println!("after transmute");
+    //let as_ord = Float64ArrayAsOrdArray(float_array);
+    //as_ord
+    Box::new(Float64ArrayAsOrdArray(float_array))
+}
+
+struct StringDictionaryArrayAsOrdArray<'a, T: ArrowDictionaryKeyType> {
+    dict_array: &'a DictionaryArray<T>,
+    keys: PrimitiveArray<T>,
+}
+
+impl<T: ArrowDictionaryKeyType> OrdArray for StringDictionaryArrayAsOrdArray<'_, T> {
+    fn cmp_value(&self, i: usize, j: usize) -> Ordering {
+        let keys = &self.keys;
+
+        let a: T::Native = keys.value(i);
+        let b: T::Native = keys.value(j);
+
+        let values = self.dict_array.values();
+        let dict = as_string_array(&values);
+
+        let sa = dict.value(a.to_usize().unwrap());
+        let sb = dict.value(b.to_usize().unwrap());
+
+        sa.cmp(sb)
+    }
+}
+
+struct SortedStringDictionaryArrayAsOrdArray<'a, T: ArrowDictionaryKeyType> {

Review comment:
       I'm also open to leaving this implementation for sorted dictionary array out for now. Although the the `is_ordered` flag already existed before this PR, there is currently no way to set it or guarantee a sorted dictionary when reading from csv or parquet.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org