You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by yi...@apache.org on 2022/12/14 01:57:30 UTC

[arrow] branch master updated: GH-14937: [C++] Add rank kernel benchmarks (#14938)

This is an automated email from the ASF dual-hosted git repository.

yibocai pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new c1aef6ffa5 GH-14937: [C++] Add rank kernel benchmarks (#14938)
c1aef6ffa5 is described below

commit c1aef6ffa5efbb4984888ce3471082383a098829
Author: Antoine Pitrou <an...@python.org>
AuthorDate: Wed Dec 14 02:57:21 2022 +0100

    GH-14937: [C++] Add rank kernel benchmarks (#14938)
    
    
    * Closes: #14937
    
    Authored-by: Antoine Pitrou <an...@python.org>
    Signed-off-by: Yibo Cai <yi...@arm.com>
---
 .../arrow/compute/kernels/vector_sort_benchmark.cc | 169 ++++++++++++++-------
 cpp/src/arrow/util/benchmark_util.h                |  33 ++--
 2 files changed, 134 insertions(+), 68 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/vector_sort_benchmark.cc b/cpp/src/arrow/compute/kernels/vector_sort_benchmark.cc
index 6ab0bcfde9..2dec0e60d6 100644
--- a/cpp/src/arrow/compute/kernels/vector_sort_benchmark.cc
+++ b/cpp/src/arrow/compute/kernels/vector_sort_benchmark.cc
@@ -29,35 +29,59 @@ namespace arrow {
 namespace compute {
 constexpr auto kSeed = 0x0ff1ce;
 
-static void ArraySortIndicesBenchmark(benchmark::State& state,
-                                      const std::shared_ptr<Array>& values) {
-  for (auto _ : state) {
-    ABORT_NOT_OK(SortIndices(*values).status());
+//
+// Array sort/rank benchmark helpers
+//
+
+struct SortRunner {
+  explicit SortRunner(benchmark::State&) {}
+
+  Status operator()(const std::shared_ptr<Array>& values) const {
+    return SortIndices(*values).status();
   }
-  state.SetItemsProcessed(state.iterations() * values->length());
-}
+  Status operator()(const std::shared_ptr<ChunkedArray>& values) const {
+    return SortIndices(*values).status();
+  }
+};
+
+struct RankRunner {
+  explicit RankRunner(benchmark::State& state) {
+    options = RankOptions::Defaults();
+    options.tiebreaker = static_cast<RankOptions::Tiebreaker>(state.range(2));
+  }
+
+  RankOptions options;
+
+  Status operator()(const std::shared_ptr<Array>& values) const {
+    return CallFunction("rank", {values}, &options).status();
+  }
+};
 
-static void ChunkedArraySortIndicesBenchmark(
-    benchmark::State& state, const std::shared_ptr<ChunkedArray>& values) {
+template <typename Runner, typename ArrayLike>
+static void ArraySortFuncBenchmark(benchmark::State& state, const Runner& runner,
+                                   const std::shared_ptr<ArrayLike>& values) {
   for (auto _ : state) {
-    ABORT_NOT_OK(SortIndices(*values).status());
+    ABORT_NOT_OK(runner(values));
   }
   state.SetItemsProcessed(state.iterations() * values->length());
 }
 
-static void ArraySortIndicesInt64Benchmark(benchmark::State& state, int64_t min,
-                                           int64_t max) {
+template <typename Runner>
+static void ArraySortFuncInt64Benchmark(benchmark::State& state, const Runner& runner,
+                                        int64_t min, int64_t max) {
   RegressionArgs args(state);
 
   const int64_t array_size = args.size / sizeof(int64_t);
   auto rand = random::RandomArrayGenerator(kSeed);
   auto values = rand.Int64(array_size, min, max, args.null_proportion);
 
-  ArraySortIndicesBenchmark(state, values);
+  ArraySortFuncBenchmark(state, runner, values);
 }
 
-static void ChunkedArraySortIndicesInt64Benchmark(benchmark::State& state, int64_t min,
-                                                  int64_t max) {
+template <typename Runner>
+static void ChunkedArraySortFuncInt64Benchmark(benchmark::State& state,
+                                               const Runner& runner, int64_t min,
+                                               int64_t max) {
   RegressionArgs args(state);
 
   const int64_t n_chunks = 10;
@@ -68,39 +92,58 @@ static void ChunkedArraySortIndicesInt64Benchmark(benchmark::State& state, int64
     chunks.push_back(rand.Int64(array_size, min, max, args.null_proportion));
   }
 
-  ChunkedArraySortIndicesBenchmark(state, std::make_shared<ChunkedArray>(chunks));
+  ArraySortFuncBenchmark(state, runner, std::make_shared<ChunkedArray>(chunks));
+}
+
+template <typename Runner>
+static void ArraySortFuncBoolBenchmark(benchmark::State& state, const Runner& runner) {
+  RegressionArgs args(state);
+
+  const int64_t array_size = args.size * 8;
+  auto rand = random::RandomArrayGenerator(kSeed);
+  auto values = rand.Boolean(array_size, 0.5, args.null_proportion);
+
+  ArraySortFuncBenchmark(state, runner, values);
 }
 
 static void ArraySortIndicesInt64Narrow(benchmark::State& state) {
-  ArraySortIndicesInt64Benchmark(state, -100, 100);
+  ArraySortFuncInt64Benchmark(state, SortRunner(state), -100, 100);
+}
+
+static void ArrayRankInt64Narrow(benchmark::State& state) {
+  ArraySortFuncInt64Benchmark(state, RankRunner(state), -100, 100);
 }
 
 static void ArraySortIndicesInt64Wide(benchmark::State& state) {
   const auto min = std::numeric_limits<int64_t>::min();
   const auto max = std::numeric_limits<int64_t>::max();
-  ArraySortIndicesInt64Benchmark(state, min, max);
+  ArraySortFuncInt64Benchmark(state, SortRunner(state), min, max);
 }
 
-static void ArraySortIndicesBool(benchmark::State& state) {
-  RegressionArgs args(state);
-
-  const int64_t array_size = args.size * 8;
-  auto rand = random::RandomArrayGenerator(kSeed);
-  auto values = rand.Boolean(array_size, 0.5, args.null_proportion);
+static void ArrayRankInt64Wide(benchmark::State& state) {
+  const auto min = std::numeric_limits<int64_t>::min();
+  const auto max = std::numeric_limits<int64_t>::max();
+  ArraySortFuncInt64Benchmark(state, RankRunner(state), min, max);
+}
 
-  ArraySortIndicesBenchmark(state, values);
+static void ArraySortIndicesBool(benchmark::State& state) {
+  ArraySortFuncBoolBenchmark(state, SortRunner(state));
 }
 
 static void ChunkedArraySortIndicesInt64Narrow(benchmark::State& state) {
-  ChunkedArraySortIndicesInt64Benchmark(state, -100, 100);
+  ChunkedArraySortFuncInt64Benchmark(state, SortRunner(state), -100, 100);
 }
 
 static void ChunkedArraySortIndicesInt64Wide(benchmark::State& state) {
   const auto min = std::numeric_limits<int64_t>::min();
   const auto max = std::numeric_limits<int64_t>::max();
-  ChunkedArraySortIndicesInt64Benchmark(state, min, max);
+  ChunkedArraySortFuncInt64Benchmark(state, SortRunner(state), min, max);
 }
 
+//
+// Record batch and table sort benchmark helpers
+//
+
 static void DatumSortIndicesBenchmark(benchmark::State& state, const Datum& datum,
                                       const SortOptions& options) {
   for (auto _ : state) {
@@ -237,35 +280,24 @@ static void TableSortIndicesInt64Wide(benchmark::State& state) {
                         std::numeric_limits<int64_t>::max());
 }
 
-BENCHMARK(ArraySortIndicesInt64Narrow)
-    ->Apply(RegressionSetArgs)
-    ->Args({1 << 20, 100})
-    ->Args({1 << 23, 100})
-    ->Unit(benchmark::TimeUnit::kNanosecond);
-
-BENCHMARK(ArraySortIndicesInt64Wide)
-    ->Apply(RegressionSetArgs)
-    ->Args({1 << 20, 100})
-    ->Args({1 << 23, 100})
-    ->Unit(benchmark::TimeUnit::kNanosecond);
+//
+// Sort benchmark declarations
+//
 
-BENCHMARK(ArraySortIndicesBool)
-    ->Apply(RegressionSetArgs)
-    ->Args({1 << 20, 100})
-    ->Args({1 << 23, 100})
-    ->Unit(benchmark::TimeUnit::kNanosecond);
+void ArraySortIndicesSetArgs(benchmark::internal::Benchmark* bench) {
+  // 2 benchmark arguments: size, inverse null proportion
+  bench->Unit(benchmark::kNanosecond);
+  bench->Apply(RegressionSetArgs);
+  bench->Args({1 << 20, 100});
+  bench->Args({1 << 23, 100});
+}
 
-BENCHMARK(ChunkedArraySortIndicesInt64Narrow)
-    ->Apply(RegressionSetArgs)
-    ->Args({1 << 20, 100})
-    ->Args({1 << 23, 100})
-    ->Unit(benchmark::TimeUnit::kNanosecond);
+BENCHMARK(ArraySortIndicesInt64Narrow)->Apply(ArraySortIndicesSetArgs);
+BENCHMARK(ArraySortIndicesInt64Wide)->Apply(ArraySortIndicesSetArgs);
+BENCHMARK(ArraySortIndicesBool)->Apply(ArraySortIndicesSetArgs);
 
-BENCHMARK(ChunkedArraySortIndicesInt64Wide)
-    ->Apply(RegressionSetArgs)
-    ->Args({1 << 20, 100})
-    ->Args({1 << 23, 100})
-    ->Unit(benchmark::TimeUnit::kNanosecond);
+BENCHMARK(ChunkedArraySortIndicesInt64Narrow)->Apply(ArraySortIndicesSetArgs);
+BENCHMARK(ChunkedArraySortIndicesInt64Wide)->Apply(ArraySortIndicesSetArgs);
 
 BENCHMARK(RecordBatchSortIndicesInt64Narrow)
     ->ArgsProduct({
@@ -301,5 +333,38 @@ BENCHMARK(TableSortIndicesInt64Wide)
     })
     ->Unit(benchmark::TimeUnit::kNanosecond);
 
+//
+// Rank benchmark declarations
+//
+
+void ArrayRankSetArgs(benchmark::internal::Benchmark* bench) {
+  // 3 benchmark arguments: size, inverse null proportion, rank tiebreaker
+  bench->Unit(benchmark::kNanosecond);
+  bench->ArgNames({"", "", "tiebreaker"});
+
+  // Use only a subset of kInverseNullProportions as the cartesian product of
+  // arguments is large already.
+  const std::vector<ArgsType> inverse_null_proportions{10, 1, 0};
+  // Don't bother with Max as it should have the same perf as Min
+  const std::vector<RankOptions::Tiebreaker> tie_breakers{
+      RankOptions::Min, RankOptions::First, RankOptions::Dense};
+
+  for (const auto inverse_null_proportion : kInverseNullProportions) {
+    for (const auto tie_breaker : tie_breakers) {
+      bench->Args({static_cast<ArgsType>(kL1Size), inverse_null_proportion,
+                   static_cast<ArgsType>(tie_breaker)});
+    }
+  }
+  for (const auto tie_breaker : tie_breakers) {
+    bench->Args({1 << 20, 100, static_cast<ArgsType>(tie_breaker)});
+  }
+  for (const auto tie_breaker : tie_breakers) {
+    bench->Args({1 << 23, 100, static_cast<ArgsType>(tie_breaker)});
+  }
+}
+
+BENCHMARK(ArrayRankInt64Narrow)->Apply(ArrayRankSetArgs);
+BENCHMARK(ArrayRankInt64Wide)->Apply(ArrayRankSetArgs);
+
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/benchmark_util.h b/cpp/src/arrow/util/benchmark_util.h
index 79484989ac..5a5f51df5a 100644
--- a/cpp/src/arrow/util/benchmark_util.h
+++ b/cpp/src/arrow/util/benchmark_util.h
@@ -25,16 +25,9 @@
 
 namespace arrow {
 
-using internal::CpuInfo;
-
-static const CpuInfo* cpu_info = CpuInfo::GetInstance();
-
-static const int64_t kL1Size = cpu_info->CacheSize(CpuInfo::CacheLevel::L1);
-static const int64_t kL2Size = cpu_info->CacheSize(CpuInfo::CacheLevel::L2);
-static const int64_t kL3Size = cpu_info->CacheSize(CpuInfo::CacheLevel::L3);
-static const int64_t kCantFitInL3Size = kL3Size * 4;
-static const std::vector<int64_t> kMemorySizes = {kL1Size, kL2Size, kL3Size,
-                                                  kCantFitInL3Size};
+// Benchmark changed its parameter type between releases from
+// int to int64_t. As it doesn't have version macros, we need
+// to apply C++ template magic.
 
 template <typename Func>
 struct BenchmarkArgsType;
@@ -46,12 +39,22 @@ struct BenchmarkArgsType<benchmark::internal::Benchmark* (
   using type = Values;
 };
 
-// Benchmark changed its parameter type between releases from
-// int to int64_t. As it doesn't have version macros, we need
-// to apply C++ template magic.
 using ArgsType =
     typename BenchmarkArgsType<decltype(&benchmark::internal::Benchmark::Args)>::type;
 
+using internal::CpuInfo;
+
+static const CpuInfo* cpu_info = CpuInfo::GetInstance();
+
+static const int64_t kL1Size = cpu_info->CacheSize(CpuInfo::CacheLevel::L1);
+static const int64_t kL2Size = cpu_info->CacheSize(CpuInfo::CacheLevel::L2);
+static const int64_t kL3Size = cpu_info->CacheSize(CpuInfo::CacheLevel::L3);
+static const int64_t kCantFitInL3Size = kL3Size * 4;
+static const std::vector<int64_t> kMemorySizes = {kL1Size, kL2Size, kL3Size,
+                                                  kCantFitInL3Size};
+// 0 is treated as "no nulls"
+static const std::vector<ArgsType> kInverseNullProportions = {10000, 100, 10, 2, 1, 0};
+
 struct GenericItemsArgs {
   // number of items processed per iteration
   const int64_t size;
@@ -82,10 +85,8 @@ void BenchmarkSetArgsWithSizes(benchmark::internal::Benchmark* bench,
                                const std::vector<int64_t>& sizes = kMemorySizes) {
   bench->Unit(benchmark::kMicrosecond);
 
-  // 0 is treated as "no nulls"
   for (const auto size : sizes) {
-    for (const auto inverse_null_proportion :
-         std::vector<ArgsType>({10000, 100, 10, 2, 1, 0})) {
+    for (const auto inverse_null_proportion : kInverseNullProportions) {
       bench->Args({static_cast<ArgsType>(size), inverse_null_proportion});
     }
   }