You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2021/06/24 08:23:20 UTC

[GitHub] [arrow] pitrou commented on a change in pull request #6167: PARQUET-1766: [C++] Handle parquet::Statistics NaNs and -0.0f as per upstream parquet-mr

pitrou commented on a change in pull request #6167:
URL: https://github.com/apache/arrow/pull/6167#discussion_r657734632



##########
File path: cpp/src/parquet/statistics.cc
##########
@@ -42,239 +47,325 @@ namespace parquet {
 
 template <typename DType, bool is_signed>
 struct CompareHelper {
-  typedef typename DType::c_type T;
-  static inline bool Compare(int type_length, const T& a, const T& b) { return a < b; }
-};
+  using T = typename DType::c_type;
 
-template <>
-struct CompareHelper<Int96Type, true> {
-  static inline bool Compare(int type_length, const Int96& a, const Int96& b) {
-    // Only the MSB bit is by Signed comparison
-    // For little-endian, this is the last bit of Int96 type
-    const int32_t amsb = static_cast<const int32_t>(a.value[2]);
-    const int32_t bmsb = static_cast<const int32_t>(b.value[2]);
-    if (amsb != bmsb) {
-      return (amsb < bmsb);
-    } else if (a.value[1] != b.value[1]) {
-      return (a.value[1] < b.value[1]);
-    }
-    return (a.value[0] < b.value[0]);
+  constexpr static T DefaultMin() { return std::numeric_limits<T>::max(); }
+  constexpr static T DefaultMax() { return std::numeric_limits<T>::lowest(); }
+
+  // MSVC17 fix, isnan is not overloaded for IntegralType as per C++11
+  // standard requirements.
+  template <typename T1 = T>
+  static arrow::enable_if_t<std::is_floating_point<T1>::value, T> Coalesce(T val,
+                                                                           T fallback) {
+    return std::isnan(val) ? fallback : val;
   }
-};
 
-template <>
-struct CompareHelper<ByteArrayType, true> {
-  static inline bool Compare(int type_length, const ByteArray& a, const ByteArray& b) {
-    const int8_t* aptr = reinterpret_cast<const int8_t*>(a.ptr);
-    const int8_t* bptr = reinterpret_cast<const int8_t*>(b.ptr);
-    return std::lexicographical_compare(aptr, aptr + a.len, bptr, bptr + b.len);
+  template <typename T1 = T>
+  static arrow::enable_if_t<!std::is_floating_point<T1>::value, T> Coalesce(T val,
+                                                                            T fallback) {
+    return val;
   }
+
+  static inline bool Compare(int type_length, const T& a, const T& b) { return a < b; }
+
+  static T Min(int type_length, T a, T b) { return a < b ? a : b; }
+  static T Max(int type_length, T a, T b) { return a < b ? b : a; }
 };
 
-template <>
-struct CompareHelper<FLBAType, true> {
-  static inline bool Compare(int type_length, const FLBA& a, const FLBA& b) {
-    const int8_t* aptr = reinterpret_cast<const int8_t*>(a.ptr);
-    const int8_t* bptr = reinterpret_cast<const int8_t*>(b.ptr);
-    return std::lexicographical_compare(aptr, aptr + type_length, bptr,
-                                        bptr + type_length);
+template <typename DType>
+struct UnsignedCompareHelperBase {
+  using T = typename DType::c_type;
+  using UCType = typename std::make_unsigned<T>::type;
+
+  constexpr static T DefaultMin() { return std::numeric_limits<UCType>::max(); }
+  constexpr static T DefaultMax() { return std::numeric_limits<UCType>::lowest(); }
+  static T Coalesce(T val, T fallback) { return val; }
+
+  static inline bool Compare(int type_length, T a, T b) {
+    return arrow::util::SafeCopy<UCType>(a) < arrow::util::SafeCopy<UCType>(b);

Review comment:
       @fsaintjacques Do you remember why `SafeCopy` was required here instead of a more mundane `static_cast`?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org