You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2022/07/11 22:47:33 UTC

[GitHub] [arrow] westonpace commented on a diff in pull request #13334: ARROW-14314: [C++] Sorting dictionary array not implemented

westonpace commented on code in PR #13334:
URL: https://github.com/apache/arrow/pull/13334#discussion_r918401961


##########
cpp/src/arrow/compute/kernels/vector_array_sort.cc:
##########
@@ -318,9 +393,10 @@ class ArrayCountOrCompareSorter {
   using c_type = typename ArrowType::c_type;
 
  public:
-  NullPartitionResult operator()(uint64_t* indices_begin, uint64_t* indices_end,
-                                 const Array& array, int64_t offset,
-                                 const ArraySortOptions& options) {
+  // `offset` is used when this is called on a chunk of a chunked array

Review Comment:
   Did you mean to copy this comment around?



##########
cpp/src/arrow/compute/kernels/vector_array_sort.cc:
##########
@@ -173,6 +175,78 @@ class ArrayCompareSorter {
   }
 };
 
+template <>
+class ArrayCompareSorter<DictionaryType> {
+  struct DictionaryInternal {
+    NullPartitionResult* p;
+    const std::shared_ptr<Array>& values;
+    const std::shared_ptr<Array>& indices;
+    const UInt64Array& values_rank;
+    const ArraySortOptions& options;
+    int64_t offset;
+
+    Status Visit(const DataType& index_type) {
+      return Status::TypeError("Dictionary sorting not supported for index type ",
+                               index_type.ToString());
+    }
+
+    template <typename IndexType>
+    enable_if_t<is_integer_type<IndexType>::value, Status> Visit(
+        const IndexType& index_type) {
+      return SortInternal<IndexType>();
+    }
+
+    template <typename IndexType>
+    Status SortInternal() {
+      using ArrayType = typename TypeTraits<IndexType>::ArrayType;
+      using GetView = GetViewType<IndexType>;
+      const auto& indices_array = checked_cast<const ArrayType&>(*indices);
+
+      std::stable_sort(this->p->non_nulls_begin, this->p->non_nulls_end,
+                       [&](uint64_t left, uint64_t right) {
+                         const auto lhs_idx =
+                             GetView::LogicalValue(indices_array.GetView(left - offset));
+                         const auto rhs_idx =
+                             GetView::LogicalValue(indices_array.GetView(right - offset));
+                         const auto lhs = GetViewType<UInt64Type>::LogicalValue(
+                             values_rank.GetView(lhs_idx - offset));
+                         const auto rhs = GetViewType<UInt64Type>::LogicalValue(
+                             values_rank.GetView(rhs_idx - offset));
+                         return lhs < rhs;
+                       });
+
+      return Status::OK();
+    }
+
+    Status Make(const std::shared_ptr<DataType>& index_type) {
+      return VisitTypeInline(*index_type, this);

Review Comment:
   Allowing the sort function to return a result caused a lot of change in this PR.  I think the only way a non-ok result could be encountered is if a dictionary had a non-integral index type (Dictionary sorting not supported for index type).  I do not think this is currently possible.  Would it be better to do something like this?
   
   ```
   void Make(const std::shared_ptr<DataType>& index_type) {
     ABORT_NOT_OK(VisitTypeInline(*index_type, this));
   }
   ```



##########
cpp/src/arrow/compute/kernels/vector_array_sort.cc:
##########
@@ -17,6 +17,7 @@
 
 #include <algorithm>
 #include <cmath>
+#include <iostream>

Review Comment:
   ```suggestion
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org