You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ap...@apache.org on 2022/06/01 13:21:43 UTC

[arrow] branch master updated: ARROW-16234: [C++] Vector Kernel for Rank (#12963)

This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 2dc64563bc ARROW-16234: [C++] Vector Kernel for Rank (#12963)
2dc64563bc is described below

commit 2dc64563bcd0d9774fd69d271035940653203126
Author: William Ayd <wi...@icloud.com>
AuthorDate: Wed Jun 1 06:21:33 2022 -0700

    ARROW-16234: [C++] Vector Kernel for Rank (#12963)
    
    Lead-authored-by: Will Ayd <wi...@icloud.com>
    Co-authored-by: William Ayd <wi...@icloud.com>
    Signed-off-by: Antoine Pitrou <an...@python.org>
---
 cpp/src/arrow/compute/api_vector.cc               |  33 ++++
 cpp/src/arrow/compute/api_vector.h                |  37 ++++
 cpp/src/arrow/compute/kernels/vector_sort.cc      | 216 ++++++++++++++++++++
 cpp/src/arrow/compute/kernels/vector_sort_test.cc | 227 ++++++++++++++++++++++
 docs/source/cpp/compute.rst                       |  13 +-
 5 files changed, 522 insertions(+), 4 deletions(-)

diff --git a/cpp/src/arrow/compute/api_vector.cc b/cpp/src/arrow/compute/api_vector.cc
index e3db022536..ad4248fc6c 100644
--- a/cpp/src/arrow/compute/api_vector.cc
+++ b/cpp/src/arrow/compute/api_vector.cc
@@ -43,6 +43,7 @@ namespace internal {
 using compute::DictionaryEncodeOptions;
 using compute::FilterOptions;
 using compute::NullPlacement;
+using compute::RankOptions;
 
 template <>
 struct EnumTraits<FilterOptions::NullSelectionBehavior>
@@ -88,6 +89,25 @@ struct EnumTraits<NullPlacement>
     return "<INVALID>";
   }
 };
+template <>
+struct EnumTraits<RankOptions::Tiebreaker>
+    : BasicEnumTraits<RankOptions::Tiebreaker, RankOptions::Min, RankOptions::Max,
+                      RankOptions::First, RankOptions::Dense> {
+  static std::string name() { return "Tiebreaker"; }
+  static std::string value_name(RankOptions::Tiebreaker value) {
+    switch (value) {
+      case RankOptions::Min:
+        return "Min";
+      case RankOptions::Max:
+        return "Max";
+      case RankOptions::First:
+        return "First";
+      case RankOptions::Dense:
+        return "Dense";
+    }
+    return "<INVALID>";
+  }
+};
 
 }  // namespace internal
 
@@ -139,6 +159,10 @@ static auto kCumulativeSumOptionsType = GetFunctionOptionsType<CumulativeSumOpti
     DataMember("start", &CumulativeSumOptions::start),
     DataMember("skip_nulls", &CumulativeSumOptions::skip_nulls),
     DataMember("check_overflow", &CumulativeSumOptions::check_overflow));
+static auto kRankOptionsType = GetFunctionOptionsType<RankOptions>(
+    DataMember("sort_keys", &RankOptions::sort_keys),
+    DataMember("null_placement", &RankOptions::null_placement),
+    DataMember("tiebreaker", &RankOptions::tiebreaker));
 }  // namespace
 }  // namespace internal
 
@@ -192,6 +216,14 @@ CumulativeSumOptions::CumulativeSumOptions(std::shared_ptr<Scalar> start, bool s
       check_overflow(check_overflow) {}
 constexpr char CumulativeSumOptions::kTypeName[];
 
+RankOptions::RankOptions(std::vector<SortKey> sort_keys, NullPlacement null_placement,
+                         RankOptions::Tiebreaker tiebreaker)
+    : FunctionOptions(internal::kRankOptionsType),
+      sort_keys(std::move(sort_keys)),
+      null_placement(null_placement),
+      tiebreaker(tiebreaker) {}
+constexpr char RankOptions::kTypeName[];
+
 namespace internal {
 void RegisterVectorOptions(FunctionRegistry* registry) {
   DCHECK_OK(registry->AddFunctionOptionsType(kFilterOptionsType));
@@ -202,6 +234,7 @@ void RegisterVectorOptions(FunctionRegistry* registry) {
   DCHECK_OK(registry->AddFunctionOptionsType(kPartitionNthOptionsType));
   DCHECK_OK(registry->AddFunctionOptionsType(kSelectKOptionsType));
   DCHECK_OK(registry->AddFunctionOptionsType(kCumulativeSumOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kRankOptionsType));
 }
 }  // namespace internal
 
diff --git a/cpp/src/arrow/compute/api_vector.h b/cpp/src/arrow/compute/api_vector.h
index b5daddb17b..1f4581566f 100644
--- a/cpp/src/arrow/compute/api_vector.h
+++ b/cpp/src/arrow/compute/api_vector.h
@@ -174,6 +174,43 @@ class ARROW_EXPORT SelectKOptions : public FunctionOptions {
   std::vector<SortKey> sort_keys;
 };
 
+/// \brief Rank options
+class ARROW_EXPORT RankOptions : public FunctionOptions {
+ public:
+  /// Configure how ties between equal values are handled
+  enum Tiebreaker {
+    /// Ties get the smallest possible rank in sorted order.
+    Min,
+    /// Ties get the largest possible rank in sorted order.
+    Max,
+    /// Ranks are assigned in order of when ties appear in the input.
+    /// This ensures the ranks are a stable permutation of the input.
+    First,
+    /// The ranks span a dense [1, M] interval where M is the number
+    /// of distinct values in the input.
+    Dense
+  };
+
+  explicit RankOptions(std::vector<SortKey> sort_keys = {},
+                       NullPlacement null_placement = NullPlacement::AtEnd,
+                       Tiebreaker tiebreaker = RankOptions::First);
+  /// Convenience constructor for array inputs
+  explicit RankOptions(SortOrder order,
+                       NullPlacement null_placement = NullPlacement::AtEnd,
+                       Tiebreaker tiebreaker = RankOptions::First)
+      : RankOptions({SortKey("", order)}, null_placement, tiebreaker) {}
+
+  static constexpr char const kTypeName[] = "RankOptions";
+  static RankOptions Defaults() { return RankOptions(); }
+
+  /// Column key(s) to order by and how to order by these sort keys.
+  std::vector<SortKey> sort_keys;
+  /// Whether nulls and NaNs are placed at the start or at the end
+  NullPlacement null_placement;
+  /// Tiebreaker for dealing with equal values in ranks
+  Tiebreaker tiebreaker;
+};
+
 /// \brief Partitioning options for NthToIndices
 class ARROW_EXPORT PartitionNthOptions : public FunctionOptions {
  public:
diff --git a/cpp/src/arrow/compute/kernels/vector_sort.cc b/cpp/src/arrow/compute/kernels/vector_sort.cc
index 8a108621d3..93ed8a7860 100644
--- a/cpp/src/arrow/compute/kernels/vector_sort.cc
+++ b/cpp/src/arrow/compute/kernels/vector_sort.cc
@@ -1905,6 +1905,221 @@ class SelectKUnstableMetaFunction : public MetaFunction {
   }
 };
 
+// ----------------------------------------------------------------------
+// Rank implementation
+
+const RankOptions* GetDefaultRankOptions() {
+  static const auto kDefaultRankOptions = RankOptions::Defaults();
+  return &kDefaultRankOptions;
+}
+
+class ArrayRanker : public TypeVisitor {
+ public:
+  ArrayRanker(ExecContext* ctx, const Array& array, const RankOptions& options,
+              Datum* output)
+      : TypeVisitor(),
+        ctx_(ctx),
+        array_(array),
+        options_(options),
+        null_placement_(options.null_placement),
+        tiebreaker_(options.tiebreaker),
+        physical_type_(GetPhysicalType(array.type())),
+        output_(output) {}
+
+  Status Run() { return physical_type_->Accept(this); }
+
+#define VISIT(TYPE) \
+  Status Visit(const TYPE& type) { return RankInternal<TYPE>(); }
+
+  VISIT_SORTABLE_PHYSICAL_TYPES(VISIT)
+
+#undef VISIT
+
+  template <typename InType>
+  Status RankInternal() {
+    using GetView = GetViewType<InType>;
+    using T = typename GetViewType<InType>::T;
+    using ArrayType = typename TypeTraits<InType>::ArrayType;
+
+    ArrayType arr(array_.data());
+
+    SortOrder order = SortOrder::Ascending;
+    if (!options_.sort_keys.empty()) {
+      order = options_.sort_keys[0].order;
+    }
+    ArraySortOptions array_options(order, null_placement_);
+
+    auto length = array_.length();
+    ARROW_ASSIGN_OR_RAISE(auto sort_indices,
+                          MakeMutableUInt64Array(uint64(), length, ctx_->memory_pool()));
+    auto sort_begin = sort_indices->GetMutableValues<uint64_t>(1);
+    auto sort_end = sort_begin + length;
+    std::iota(sort_begin, sort_end, 0);
+
+    ARROW_ASSIGN_OR_RAISE(auto array_sorter, GetArraySorter(*physical_type_));
+
+    NullPartitionResult sorted =
+        array_sorter(sort_begin, sort_end, arr, 0, array_options);
+    uint64_t rank;
+
+    ARROW_ASSIGN_OR_RAISE(auto rankings,
+                          MakeMutableUInt64Array(uint64(), length, ctx_->memory_pool()));
+    auto out_begin = rankings->GetMutableValues<uint64_t>(1);
+
+    switch (tiebreaker_) {
+      case RankOptions::Dense: {
+        T curr_value, prev_value;
+        rank = 0;
+
+        if (null_placement_ == NullPlacement::AtStart && sorted.null_count() > 0) {
+          rank++;
+          for (auto it = sorted.nulls_begin; it < sorted.nulls_end; it++) {
+            out_begin[*it] = rank;
+          }
+        }
+
+        for (auto it = sorted.non_nulls_begin; it < sorted.non_nulls_end; it++) {
+          curr_value = GetView::LogicalValue(arr.GetView(*it));
+          if (it == sorted.non_nulls_begin || curr_value != prev_value) {
+            rank++;
+          }
+
+          out_begin[*it] = rank;
+          prev_value = curr_value;
+        }
+
+        if (null_placement_ == NullPlacement::AtEnd) {
+          rank++;
+          for (auto it = sorted.nulls_begin; it < sorted.nulls_end; it++) {
+            out_begin[*it] = rank;
+          }
+        }
+        break;
+      }
+
+      case RankOptions::First: {
+        rank = 0;
+        for (auto it = sorted.overall_begin(); it < sorted.overall_end(); it++) {
+          out_begin[*it] = ++rank;
+        }
+        break;
+      }
+
+      case RankOptions::Min: {
+        T curr_value, prev_value;
+
+        if (null_placement_ == NullPlacement::AtStart) {
+          rank = 1;
+          for (auto it = sorted.nulls_begin; it < sorted.nulls_end; it++) {
+            out_begin[*it] = rank;
+          }
+        }
+
+        for (auto it = sorted.non_nulls_begin; it < sorted.non_nulls_end; it++) {
+          curr_value = GetView::LogicalValue(arr.GetView(*it));
+          if (it == sorted.non_nulls_begin || curr_value != prev_value) {
+            rank = (it - sorted.overall_begin()) + 1;
+          }
+          out_begin[*it] = rank;
+          prev_value = curr_value;
+        }
+
+        if (null_placement_ == NullPlacement::AtEnd) {
+          rank = sorted.non_null_count() + 1;
+          for (auto it = sorted.nulls_begin; it < sorted.nulls_end; it++) {
+            out_begin[*it] = rank;
+          }
+        }
+        break;
+      }
+
+      case RankOptions::Max: {
+        // The algorithm for Max is just like Min, but in reverse order.
+        T curr_value, prev_value;
+
+        if (null_placement_ == NullPlacement::AtEnd) {
+          rank = length;
+          for (auto it = sorted.nulls_begin; it < sorted.nulls_end; it++) {
+            out_begin[*it] = rank;
+          }
+        }
+
+        for (auto it = sorted.non_nulls_end - 1; it >= sorted.non_nulls_begin; it--) {
+          curr_value = GetView::LogicalValue(arr.GetView(*it));
+          if (it == sorted.non_nulls_end - 1 || curr_value != prev_value) {
+            rank = (it - sorted.overall_begin()) + 1;
+          }
+          out_begin[*it] = rank;
+          prev_value = curr_value;
+        }
+
+        if (null_placement_ == NullPlacement::AtStart) {
+          rank = sorted.null_count();
+          for (auto it = sorted.nulls_begin; it < sorted.nulls_end; it++) {
+            out_begin[*it] = rank;
+          }
+        }
+
+        break;
+      }
+    }
+
+    *output_ = Datum(rankings);
+    return Status::OK();
+  }
+
+  ExecContext* ctx_;
+  const Array& array_;
+  const RankOptions& options_;
+  const NullPlacement null_placement_;
+  const RankOptions::Tiebreaker tiebreaker_;
+  const std::shared_ptr<DataType> physical_type_;
+  Datum* output_;
+};
+
+const FunctionDoc rank_doc(
+    "Compute numerical ranks of an array (1-based)",
+    ("This function computes a rank of the input array.\n"
+     "By default, null values are considered greater than any other value and\n"
+     "are therefore sorted at the end of the input. For floating-point types,\n"
+     "NaNs are considered greater than any other non-null value, but smaller\n"
+     "than null values. The default tiebreaker is to assign ranks in order of\n"
+     "when ties appear in the input.\n"
+     "\n"
+     "The handling of nulls, NaNs and tiebreakers can be changed in RankOptions."),
+    {"input"}, "RankOptions");
+
+class RankMetaFunction : public MetaFunction {
+ public:
+  RankMetaFunction()
+      : MetaFunction("rank", Arity::Unary(), rank_doc, GetDefaultRankOptions()) {}
+
+  Result<Datum> ExecuteImpl(const std::vector<Datum>& args,
+                            const FunctionOptions* options, ExecContext* ctx) const {
+    const RankOptions& rank_options = checked_cast<const RankOptions&>(*options);
+    switch (args[0].kind()) {
+      case Datum::ARRAY: {
+        return Rank(*args[0].make_array(), rank_options, ctx);
+      } break;
+      default:
+        break;
+    }
+    return Status::NotImplemented(
+        "Unsupported types for rank operation: "
+        "values=",
+        args[0].ToString());
+  }
+
+ private:
+  Result<Datum> Rank(const Array& array, const RankOptions& options,
+                     ExecContext* ctx) const {
+    Datum output;
+    ArrayRanker ranker(ctx, array, options, &output);
+    ARROW_RETURN_NOT_OK(ranker.Run());
+    return output;
+  }
+};
+
 }  // namespace
 
 Status SortChunkedArray(ExecContext* ctx, uint64_t* indices_begin, uint64_t* indices_end,
@@ -1918,6 +2133,7 @@ Status SortChunkedArray(ExecContext* ctx, uint64_t* indices_begin, uint64_t* ind
 void RegisterVectorSort(FunctionRegistry* registry) {
   DCHECK_OK(registry->AddFunction(std::make_shared<SortIndicesMetaFunction>()));
   DCHECK_OK(registry->AddFunction(std::make_shared<SelectKUnstableMetaFunction>()));
+  DCHECK_OK(registry->AddFunction(std::make_shared<RankMetaFunction>()));
 }
 
 #undef VISIT_SORTABLE_PHYSICAL_TYPES
diff --git a/cpp/src/arrow/compute/kernels/vector_sort_test.cc b/cpp/src/arrow/compute/kernels/vector_sort_test.cc
index 50add1aec4..c32b1a1e59 100644
--- a/cpp/src/arrow/compute/kernels/vector_sort_test.cc
+++ b/cpp/src/arrow/compute/kernels/vector_sort_test.cc
@@ -53,6 +53,10 @@ std::vector<NullPlacement> AllNullPlacements() {
   return {NullPlacement::AtEnd, NullPlacement::AtStart};
 }
 
+std::vector<RankOptions::Tiebreaker> AllTiebreakers() {
+  return {RankOptions::Min, RankOptions::Max, RankOptions::First, RankOptions::Dense};
+}
+
 std::ostream& operator<<(std::ostream& os, NullPlacement null_placement) {
   os << (null_placement == NullPlacement::AtEnd ? "AtEnd" : "AtStart");
   return os;
@@ -1955,5 +1959,228 @@ INSTANTIATE_TEST_SUITE_P(AllNull, TestTableSortIndicesRandom,
                          testing::Combine(first_sort_keys, num_sort_keys,
                                           testing::Values(1.0)));
 
+// ----------------------------------------------------------------------
+// Tests for Rank
+
+void AssertRank(const std::shared_ptr<Array>& input, SortOrder order,
+                NullPlacement null_placement, RankOptions::Tiebreaker tiebreaker,
+                const std::shared_ptr<Array>& expected) {
+  const std::vector<SortKey> sort_keys{SortKey("foo", order)};
+  RankOptions options(sort_keys, null_placement, tiebreaker);
+  ARROW_SCOPED_TRACE("options = ", options.ToString());
+  ASSERT_OK_AND_ASSIGN(auto actual, CallFunction("rank", {input}, &options));
+  ValidateOutput(actual);
+  AssertDatumsEqual(expected, actual, /*verbose=*/true);
+}
+
+void AssertRankEmpty(std::shared_ptr<DataType> type, SortOrder order,
+                     NullPlacement null_placement, RankOptions::Tiebreaker tiebreaker) {
+  AssertRank(ArrayFromJSON(type, "[]"), order, null_placement, tiebreaker,
+             ArrayFromJSON(uint64(), "[]"));
+  AssertRank(ArrayFromJSON(type, "[null]"), order, null_placement, tiebreaker,
+             ArrayFromJSON(uint64(), "[1]"));
+}
+
+void AssertRankSimple(const std::shared_ptr<Array>& input, NullPlacement null_placement,
+                      RankOptions::Tiebreaker tiebreaker) {
+  auto expected_asc = ArrayFromJSON(uint64(), "[3, 4, 2, 1, 5]");
+  AssertRank(input, SortOrder::Ascending, null_placement, tiebreaker, expected_asc);
+
+  auto expected_desc = ArrayFromJSON(uint64(), "[3, 2, 4, 5, 1]");
+  AssertRank(input, SortOrder::Descending, null_placement, tiebreaker, expected_desc);
+}
+
+void AssertRankAllTiebreakers(const std::shared_ptr<Array>& input) {
+  AssertRank(input, SortOrder::Ascending, NullPlacement::AtEnd, RankOptions::Min,
+             ArrayFromJSON(uint64(), "[3, 1, 4, 6, 4, 6, 1]"));
+  AssertRank(input, SortOrder::Ascending, NullPlacement::AtEnd, RankOptions::Max,
+             ArrayFromJSON(uint64(), "[3, 2, 5, 7, 5, 7, 2]"));
+  AssertRank(input, SortOrder::Ascending, NullPlacement::AtEnd, RankOptions::First,
+             ArrayFromJSON(uint64(), "[3, 1, 4, 6, 5, 7, 2]"));
+  AssertRank(input, SortOrder::Ascending, NullPlacement::AtEnd, RankOptions::Dense,
+             ArrayFromJSON(uint64(), "[2, 1, 3, 4, 3, 4, 1]"));
+
+  AssertRank(input, SortOrder::Ascending, NullPlacement::AtStart, RankOptions::Min,
+             ArrayFromJSON(uint64(), "[5, 3, 6, 1, 6, 1, 3]"));
+  AssertRank(input, SortOrder::Ascending, NullPlacement::AtStart, RankOptions::Max,
+             ArrayFromJSON(uint64(), "[5, 4, 7, 2, 7, 2, 4]"));
+  AssertRank(input, SortOrder::Ascending, NullPlacement::AtStart, RankOptions::First,
+             ArrayFromJSON(uint64(), "[5, 3, 6, 1, 7, 2, 4]"));
+  AssertRank(input, SortOrder::Ascending, NullPlacement::AtStart, RankOptions::Dense,
+             ArrayFromJSON(uint64(), "[3, 2, 4, 1, 4, 1, 2]"));
+
+  AssertRank(input, SortOrder::Descending, NullPlacement::AtEnd, RankOptions::Min,
+             ArrayFromJSON(uint64(), "[3, 4, 1, 6, 1, 6, 4]"));
+  AssertRank(input, SortOrder::Descending, NullPlacement::AtEnd, RankOptions::Max,
+             ArrayFromJSON(uint64(), "[3, 5, 2, 7, 2, 7, 5]"));
+  AssertRank(input, SortOrder::Descending, NullPlacement::AtEnd, RankOptions::First,
+             ArrayFromJSON(uint64(), "[3, 4, 1, 6, 2, 7, 5]"));
+  AssertRank(input, SortOrder::Descending, NullPlacement::AtEnd, RankOptions::Dense,
+             ArrayFromJSON(uint64(), "[2, 3, 1, 4, 1, 4, 3]"));
+
+  AssertRank(input, SortOrder::Descending, NullPlacement::AtStart, RankOptions::Min,
+             ArrayFromJSON(uint64(), "[5, 6, 3, 1, 3, 1, 6]"));
+  AssertRank(input, SortOrder::Descending, NullPlacement::AtStart, RankOptions::Max,
+             ArrayFromJSON(uint64(), "[5, 7, 4, 2, 4, 2, 7]"));
+  AssertRank(input, SortOrder::Descending, NullPlacement::AtStart, RankOptions::First,
+             ArrayFromJSON(uint64(), "[5, 6, 3, 1, 4, 2, 7]"));
+  AssertRank(input, SortOrder::Descending, NullPlacement::AtStart, RankOptions::Dense,
+             ArrayFromJSON(uint64(), "[3, 4, 2, 1, 2, 1, 4]"));
+}
+
+TEST(TestRankForReal, RankReal) {
+  for (auto real_type : ::arrow::FloatingPointTypes()) {
+    for (auto null_placement : AllNullPlacements()) {
+      for (auto tiebreaker : AllTiebreakers()) {
+        for (auto order : AllOrders()) {
+          AssertRankEmpty(real_type, order, null_placement, tiebreaker);
+        }
+
+        AssertRankSimple(ArrayFromJSON(real_type, "[2.1, 3.2, 1.0, 0.0, 5.5]"),
+                         null_placement, tiebreaker);
+      }
+    }
+
+    AssertRankAllTiebreakers(
+        ArrayFromJSON(real_type, "[1.2, 0.0, 5.3, null, 5.3, null, 0.0]"));
+  }
+}
+
+TEST(TestRankForIntegral, RankIntegral) {
+  for (auto integer_type : ::arrow::IntTypes()) {
+    for (auto null_placement : AllNullPlacements()) {
+      for (auto tiebreaker : AllTiebreakers()) {
+        for (auto order : AllOrders()) {
+          AssertRankEmpty(integer_type, order, null_placement, tiebreaker);
+        }
+
+        AssertRankSimple(ArrayFromJSON(integer_type, "[2, 3, 1, 0, 5]"), null_placement,
+                         tiebreaker);
+      }
+    }
+    AssertRankAllTiebreakers(ArrayFromJSON(integer_type, "[1, 0, 5, null, 5, null, 0]"));
+  }
+}
+
+TEST(TestRankForBool, RankBool) {
+  for (auto null_placement : AllNullPlacements()) {
+    for (auto tiebreaker : AllTiebreakers()) {
+      for (auto order : AllOrders()) {
+        AssertRankEmpty(boolean(), order, null_placement, tiebreaker);
+      }
+
+      auto simple_bool = ArrayFromJSON(boolean(), "[false, true]");
+      AssertRank(simple_bool, SortOrder::Ascending, null_placement, tiebreaker,
+                 ArrayFromJSON(uint64(), "[1, 2]"));
+      AssertRank(simple_bool, SortOrder::Descending, null_placement, tiebreaker,
+                 ArrayFromJSON(uint64(), "[2, 1]"));
+
+      auto array =
+          ArrayFromJSON(boolean(), "[true, false, true, null, true, null, false]");
+      AssertRank(ArrayFromJSON(boolean(), "[true, false, true, null, true, null, false]"),
+                 SortOrder::Ascending, NullPlacement::AtEnd, RankOptions::Min,
+                 ArrayFromJSON(uint64(), "[3, 1, 3, 6, 3, 6, 1]"));
+      AssertRank(ArrayFromJSON(boolean(), "[true, false, true, null, true, null, false]"),
+                 SortOrder::Ascending, NullPlacement::AtEnd, RankOptions::Max,
+                 ArrayFromJSON(uint64(), "[5, 2, 5, 7, 5, 7, 2]"));
+      AssertRank(ArrayFromJSON(boolean(), "[true, false, true, null, true, null, false]"),
+                 SortOrder::Ascending, NullPlacement::AtEnd, RankOptions::First,
+                 ArrayFromJSON(uint64(), "[3, 1, 4, 6, 5, 7, 2]"));
+      AssertRank(ArrayFromJSON(boolean(), "[true, false, true, null, true, null, false]"),
+                 SortOrder::Ascending, NullPlacement::AtEnd, RankOptions::Dense,
+                 ArrayFromJSON(uint64(), "[2, 1, 2, 3, 2, 3, 1]"));
+
+      AssertRank(ArrayFromJSON(boolean(), "[true, false, true, null, true, null, false]"),
+                 SortOrder::Ascending, NullPlacement::AtStart, RankOptions::Min,
+                 ArrayFromJSON(uint64(), "[5, 3, 5, 1, 5, 1, 3]"));
+      AssertRank(ArrayFromJSON(boolean(), "[true, false, true, null, true, null, false]"),
+                 SortOrder::Ascending, NullPlacement::AtStart, RankOptions::Max,
+                 ArrayFromJSON(uint64(), "[7, 4, 7, 2, 7, 2, 4]"));
+      AssertRank(ArrayFromJSON(boolean(), "[true, false, true, null, true, null, false]"),
+                 SortOrder::Ascending, NullPlacement::AtStart, RankOptions::First,
+                 ArrayFromJSON(uint64(), "[5, 3, 6, 1, 7, 2, 4]"));
+      AssertRank(ArrayFromJSON(boolean(), "[true, false, true, null, true, null, false]"),
+                 SortOrder::Ascending, NullPlacement::AtStart, RankOptions::Dense,
+                 ArrayFromJSON(uint64(), "[3, 2, 3, 1, 3, 1, 2]"));
+
+      AssertRank(ArrayFromJSON(boolean(), "[true, false, true, null, true, null, false]"),
+                 SortOrder::Descending, NullPlacement::AtEnd, RankOptions::Min,
+                 ArrayFromJSON(uint64(), "[1, 4, 1, 6, 1, 6, 4]"));
+      AssertRank(ArrayFromJSON(boolean(), "[true, false, true, null, true, null, false]"),
+                 SortOrder::Descending, NullPlacement::AtEnd, RankOptions::Max,
+                 ArrayFromJSON(uint64(), "[3, 5, 3, 7, 3, 7, 5]"));
+      AssertRank(ArrayFromJSON(boolean(), "[true, false, true, null, true, null, false]"),
+                 SortOrder::Descending, NullPlacement::AtEnd, RankOptions::First,
+                 ArrayFromJSON(uint64(), "[1, 4, 2, 6, 3, 7, 5]"));
+      AssertRank(ArrayFromJSON(boolean(), "[true, false, true, null, true, null, false]"),
+                 SortOrder::Descending, NullPlacement::AtEnd, RankOptions::Dense,
+                 ArrayFromJSON(uint64(), "[1, 2, 1, 3, 1, 3, 2]"));
+
+      AssertRank(ArrayFromJSON(boolean(), "[true, false, true, null, true, null, false]"),
+                 SortOrder::Descending, NullPlacement::AtStart, RankOptions::Min,
+                 ArrayFromJSON(uint64(), "[3, 6, 3, 1, 3, 1, 6]"));
+      AssertRank(ArrayFromJSON(boolean(), "[true, false, true, null, true, null, false]"),
+                 SortOrder::Descending, NullPlacement::AtStart, RankOptions::Max,
+                 ArrayFromJSON(uint64(), "[5, 7, 5, 2, 5, 2, 7]"));
+      AssertRank(ArrayFromJSON(boolean(), "[true, false, true, null, true, null, false]"),
+                 SortOrder::Descending, NullPlacement::AtStart, RankOptions::First,
+                 ArrayFromJSON(uint64(), "[3, 6, 4, 1, 5, 2, 7]"));
+      AssertRank(ArrayFromJSON(boolean(), "[true, false, true, null, true, null, false]"),
+                 SortOrder::Descending, NullPlacement::AtStart, RankOptions::Dense,
+                 ArrayFromJSON(uint64(), "[2, 3, 2, 1, 2, 1, 3]"));
+    }
+  }
+}
+
+TEST(TestRankForTemporal, RankTemporal) {
+  for (auto temporal_type : ::arrow::TemporalTypes()) {
+    for (auto null_placement : AllNullPlacements()) {
+      for (auto tiebreaker : AllTiebreakers()) {
+        for (auto order : AllOrders()) {
+          AssertRankEmpty(temporal_type, order, null_placement, tiebreaker);
+        }
+
+        AssertRankSimple(ArrayFromJSON(temporal_type, "[2, 3, 1, 0, 5]"), null_placement,
+                         tiebreaker);
+      }
+    }
+    AssertRankAllTiebreakers(ArrayFromJSON(temporal_type, "[1, 0, 5, null, 5, null, 0]"));
+  }
+}
+
+TEST(TestRankForStrings, RankStrings) {
+  for (auto string_type : ::arrow::StringTypes()) {
+    for (auto null_placement : AllNullPlacements()) {
+      for (auto tiebreaker : AllTiebreakers()) {
+        for (auto order : AllOrders()) {
+          AssertRankEmpty(string_type, order, null_placement, tiebreaker);
+        }
+
+        AssertRankSimple(ArrayFromJSON(string_type, R"(["b", "c", "a", "", "d"])"),
+                         null_placement, tiebreaker);
+      }
+    }
+    AssertRankAllTiebreakers(
+        ArrayFromJSON(string_type, R"(["a", "", "e", null, "e", null, ""])"));
+  }
+}
+
+TEST(TestRankForFixedSizeBinary, RankFixedSizeBinary) {
+  auto binary_type = fixed_size_binary(3);
+  for (auto null_placement : AllNullPlacements()) {
+    for (auto tiebreaker : AllTiebreakers()) {
+      for (auto order : AllOrders()) {
+        AssertRankEmpty(binary_type, order, null_placement, tiebreaker);
+      }
+
+      AssertRankSimple(
+          ArrayFromJSON(binary_type, R"(["bbb", "ccc", "aaa", "   ", "ddd"])"),
+          null_placement, tiebreaker);
+    }
+  }
+  AssertRankAllTiebreakers(
+      ArrayFromJSON(binary_type, R"(["aaa", "   ", "eee", null, "eee", null, "   "])"));
+}
+
 }  // namespace compute
 }  // namespace arrow
diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst
index c373fa8988..80aea63dfa 100644
--- a/docs/source/cpp/compute.rst
+++ b/docs/source/cpp/compute.rst
@@ -1665,10 +1665,13 @@ in the respective option classes.
 +-----------------------+------------+---------------------------------------------------------+-------------------+--------------------------------+----------------+
 | partition_nth_indices | Unary      | Boolean, Numeric, Temporal, Binary- and String-like     | UInt64            | :struct:`PartitionNthOptions`  | \(3)           |
 +-----------------------+------------+---------------------------------------------------------+-------------------+--------------------------------+----------------+
-| select_k_unstable     | Unary      | Boolean, Numeric, Temporal, Binary- and String-like     | UInt64            | :struct:`SelectKOptions`       | \(4) \(5)      |
+| rank                  | Unary      | Boolean, Numeric, Temporal, Binary- and String-like     | UInt64            | :struct:`RankOptions`          | \(4)           |
 +-----------------------+------------+---------------------------------------------------------+-------------------+--------------------------------+----------------+
-| sort_indices          | Unary      | Boolean, Numeric, Temporal, Binary- and String-like     | UInt64            | :struct:`SortOptions`          | \(1) \(4)      |
+| select_k_unstable     | Unary      | Boolean, Numeric, Temporal, Binary- and String-like     | UInt64            | :struct:`SelectKOptions`       | \(5) \(6)      |
 +-----------------------+------------+---------------------------------------------------------+-------------------+--------------------------------+----------------+
+| sort_indices          | Unary      | Boolean, Numeric, Temporal, Binary- and String-like     | UInt64            | :struct:`SortOptions`          | \(1) \(5)      |
++-----------------------+------------+---------------------------------------------------------+-------------------+--------------------------------+----------------+
+
 
 * \(1) The output is an array of indices into the input, that define a
   stable sort of the input.
@@ -1682,11 +1685,13 @@ in the respective option classes.
   :func:`std::nth_element`).  *N* is given in
   :member:`PartitionNthOptions::pivot`.
 
-* \(4) The input can be an array, chunked array, record batch or
+* \(4) The output is a one-based numerical array of ranks
+
+* \(5) The input can be an array, chunked array, record batch or
   table. If the input is a record batch or table, one or more sort
   keys must be specified.
 
-* \(5) The output is an array of indices into the input, that define a
+* \(6) The output is an array of indices into the input, that define a
   non-stable sort of the input.
 
 .. _cpp-compute-vector-structural-transforms: