You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2022/07/04 19:12:25 UTC

[GitHub] [arrow] ArianaVillegas commented on a diff in pull request #13334: ARROW-14314: [C++] Sorting dictionary array not implemented

ArianaVillegas commented on code in PR #13334:
URL: https://github.com/apache/arrow/pull/13334#discussion_r913235196


##########
cpp/src/arrow/compute/kernels/vector_array_sort.cc:
##########
@@ -172,6 +173,85 @@ class ArrayCompareSorter {
   }
 };
 
+template <>
+class ArrayCompareSorter<DictionaryType> {
+  struct DictionaryInternal {
+    NullPartitionResult p;
+    const std::shared_ptr<Array>& values;
+    const std::shared_ptr<Array>& indices;
+    const UInt64Array& indices_values;
+    const ArraySortOptions& options;
+    int64_t offset;
+
+    Status Visit(const DataType& index_type) {
+      return Status::TypeError("Dictionary sorting not supported for index type ",
+                               index_type.ToString());
+    }
+
+    template <typename IndexType>
+    enable_if_t<is_integer_type<IndexType>::value, Status> Visit(
+        const IndexType& index_type) {
+      return SortInternal<IndexType>();
+    }
+
+    template <typename IndexType>
+    Status SortInternal() {
+      using ArrayType = typename TypeTraits<IndexType>::ArrayType;
+      using GetView = GetViewType<IndexType>;
+      const auto& indices_array = checked_cast<const ArrayType&>(*indices);
+
+      std::vector<uint64_t> sort_order(indices_values.length());
+      uint64_t cur = 0;
+      auto cur_idx = GetViewType<UInt64Type>::LogicalValue(indices_values.GetView(cur));
+      auto cur_val = values->GetScalar(cur_idx);
+      for (int i = 0; i < indices_values.length(); i++) {
+        auto tmp_idx = GetViewType<UInt64Type>::LogicalValue(indices_values.GetView(i));
+        auto tmp_val = values->GetScalar(tmp_idx);
+        if (cur_val != tmp_val) {
+          cur = i;
+          cur_val = tmp_val;
+        }
+        sort_order[tmp_idx] = cur;
+      }
+
+      std::stable_sort(
+          this->p.non_nulls_begin, this->p.non_nulls_end,
+          [&](uint64_t left, uint64_t right) {
+            const auto lhs = GetView::LogicalValue(indices_array.GetView(left - offset));
+            const auto rhs = GetView::LogicalValue(indices_array.GetView(right - offset));
+            return sort_order[lhs] < sort_order[rhs];
+          });
+
+      return Status::OK();
+    }
+
+    Result<NullPartitionResult> Make(const std::shared_ptr<DataType>& index_type) {
+      RETURN_NOT_OK(VisitTypeInline(*index_type, this));
+      return std::move(p);
+    }
+  };
+
+ public:
+  NullPartitionResult operator()(uint64_t* indices_begin, uint64_t* indices_end,
+                                 const Array& array, int64_t offset,
+                                 const ArraySortOptions& options) {
+    const auto& dict_array = checked_cast<const DictionaryArray&>(array);
+    const auto& values = dict_array.dictionary();
+    const auto& indices = dict_array.indices();
+    const auto& index_type = dict_array.dict_type()->index_type();
+
+    NullPartitionResult p = PartitionNulls<DictionaryArray, StablePartitioner>(
+        indices_begin, indices_end, dict_array, offset, options.null_placement);
+    auto indices_array =
+        CallFunction("array_sort_indices", {values}, &options).ValueOrDie().make_array();
+
+    const auto& indices_values = checked_cast<const UInt64Array&>(*indices_array);

Review Comment:
   I think this should work because it is sorting values array, and as far as I know the output array is of UInt64 type. Probably I should change `indices_array` to `values_array` to avoid confusion.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org