You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by li...@apache.org on 2022/04/27 13:04:34 UTC

[arrow] branch master updated: ARROW-16173: [C++] Add benchmarks for temporal functions/kernels

This is an automated email from the ASF dual-hosted git repository.

lidavidm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 06ca00c2da ARROW-16173: [C++] Add benchmarks for temporal functions/kernels
06ca00c2da is described below

commit 06ca00c2daeeb0d6461e7b6bec51679c19b5b92b
Author: Rok <ro...@mihevc.org>
AuthorDate: Wed Apr 27 09:04:17 2022 -0400

    ARROW-16173: [C++] Add benchmarks for temporal functions/kernels
    
    This is to resolve [ARROW-16173](https://issues.apache.org/jira/browse/ARROW-16173).
    
    We want to benchmark temporal kernels to avoid performance regressions in future refactorings.
    
    Closes #12997 from rok/ARROW-16173
    
    Authored-by: Rok <ro...@mihevc.org>
    Signed-off-by: David Li <li...@gmail.com>
---
 cpp/src/arrow/compute/kernels/CMakeLists.txt       |   1 +
 .../compute/kernels/scalar_temporal_benchmark.cc   | 218 +++++++++++++++++++++
 2 files changed, 219 insertions(+)

diff --git a/cpp/src/arrow/compute/kernels/CMakeLists.txt b/cpp/src/arrow/compute/kernels/CMakeLists.txt
index 93a02cdb1f..0a7f619112 100644
--- a/cpp/src/arrow/compute/kernels/CMakeLists.txt
+++ b/cpp/src/arrow/compute/kernels/CMakeLists.txt
@@ -41,6 +41,7 @@ add_arrow_benchmark(scalar_if_else_benchmark PREFIX "arrow-compute")
 add_arrow_benchmark(scalar_random_benchmark PREFIX "arrow-compute")
 add_arrow_benchmark(scalar_set_lookup_benchmark PREFIX "arrow-compute")
 add_arrow_benchmark(scalar_string_benchmark PREFIX "arrow-compute")
+add_arrow_benchmark(scalar_temporal_benchmark PREFIX "arrow-compute")
 
 # ----------------------------------------------------------------------
 # Vector kernels
diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_benchmark.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_benchmark.cc
new file mode 100644
index 0000000000..ddaa502a34
--- /dev/null
+++ b/cpp/src/arrow/compute/kernels/scalar_temporal_benchmark.cc
@@ -0,0 +1,218 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <functional>
+
+#include "benchmark/benchmark.h"
+
+#include "arrow/compute/api_scalar.h"
+#include "arrow/compute/kernels/test_util.h"
+#include "arrow/testing/gtest_util.h"
+#include "arrow/testing/random.h"
+#include "arrow/util/benchmark_util.h"
+
+namespace arrow {
+
+namespace compute {
+
+constexpr auto kSeed = 0x94378165;
+
+std::vector<int64_t> g_data_sizes = {kL2Size};
+static constexpr int64_t kInt64Min = -2000000000;  // 1906-08-16 20:26:40
+static constexpr int64_t kInt64Max = 2000000000;   // 2033-05-18 03:33:20
+
+void SetArgs(benchmark::internal::Benchmark* bench) {
+  for (const auto inverse_null_proportion : std::vector<ArgsType>({100, 0})) {
+    bench->Args({static_cast<ArgsType>(kL2Size), inverse_null_proportion});
+  }
+}
+
+using UnaryRoundingOp = Result<Datum>(const Datum&, const RoundTemporalOptions,
+                                      ExecContext*);
+using UnaryOp = Result<Datum>(const Datum&, ExecContext*);
+
+template <UnaryRoundingOp& Op, std::shared_ptr<DataType>& timestamp_type,
+          RoundTemporalOptions& options>
+static void BenchmarkTemporalRounding(benchmark::State& state) {
+  RegressionArgs args(state);
+  ExecContext* ctx = default_exec_context();
+
+  const int64_t array_size = args.size / sizeof(int64_t);
+
+  auto rand = random::RandomArrayGenerator(kSeed);
+  auto array =
+      rand.Numeric<Int64Type>(array_size, kInt64Min, kInt64Max, args.null_proportion);
+  EXPECT_OK_AND_ASSIGN(auto timestamp_array, array->View(timestamp_type));
+
+  for (auto _ : state) {
+    ABORT_NOT_OK(Op(timestamp_array, options, ctx).status());
+  }
+
+  state.SetItemsProcessed(state.iterations() * array_size);
+}
+
+template <UnaryOp& Op, std::shared_ptr<DataType>& timestamp_type>
+static void BenchmarkTemporal(benchmark::State& state) {
+  RegressionArgs args(state);
+  ExecContext* ctx = default_exec_context();
+
+  const int64_t array_size = args.size / sizeof(int64_t);
+
+  auto rand = random::RandomArrayGenerator(kSeed);
+  auto array =
+      rand.Numeric<Int64Type>(array_size, kInt64Min, kInt64Max, args.null_proportion);
+  EXPECT_OK_AND_ASSIGN(auto timestamp_array, array->View(timestamp_type));
+
+  for (auto _ : state) {
+    ABORT_NOT_OK(Op(timestamp_array, ctx).status());
+  }
+
+  state.SetItemsProcessed(state.iterations() * array_size);
+}
+
+template <std::shared_ptr<DataType>& timestamp_type>
+static void BenchmarkStrftime(benchmark::State& state) {
+  RegressionArgs args(state);
+  ExecContext* ctx = default_exec_context();
+
+  const int64_t array_size = args.size / sizeof(int64_t);
+
+  auto rand = random::RandomArrayGenerator(kSeed);
+  auto array =
+      rand.Numeric<Int64Type>(array_size, kInt64Min, kInt64Max, args.null_proportion);
+  EXPECT_OK_AND_ASSIGN(auto timestamp_array, array->View(timestamp_type));
+
+  auto options = StrftimeOptions();
+  for (auto _ : state) {
+    ABORT_NOT_OK(Strftime(timestamp_array, options, ctx).status());
+  }
+
+  state.SetItemsProcessed(state.iterations() * array_size);
+}
+
+template <std::shared_ptr<DataType>& timestamp_type>
+static void BenchmarkStrptime(benchmark::State& state) {
+  RegressionArgs args(state);
+  ExecContext* ctx = default_exec_context();
+
+  const int64_t array_size = args.size / sizeof(int64_t);
+
+  auto rand = random::RandomArrayGenerator(kSeed);
+  auto array =
+      rand.Numeric<Int64Type>(array_size, kInt64Min, kInt64Max, args.null_proportion);
+  EXPECT_OK_AND_ASSIGN(auto timestamp_array, array->View(timestamp_type));
+  auto strftime_options = StrftimeOptions("%Y-%m-%dT%H:%M:%S");
+  EXPECT_OK_AND_ASSIGN(auto string_array,
+                       Strftime(timestamp_array, strftime_options, ctx));
+  auto strptime_options = StrptimeOptions("%Y-%m-%dT%H:%M:%S", TimeUnit::MICRO, true);
+
+  for (auto _ : state) {
+    ABORT_NOT_OK(Strptime(string_array, strptime_options, ctx).status());
+  }
+
+  state.SetItemsProcessed(state.iterations() * array_size);
+}
+
+static void BenchmarkAssumeTimezone(benchmark::State& state) {
+  RegressionArgs args(state);
+  ExecContext* ctx = default_exec_context();
+
+  const int64_t array_size = args.size / sizeof(int64_t);
+
+  auto rand = random::RandomArrayGenerator(kSeed);
+  auto array =
+      rand.Numeric<Int64Type>(array_size, kInt64Min, kInt64Max, args.null_proportion);
+  EXPECT_OK_AND_ASSIGN(auto timestamp_array, array->View(timestamp(TimeUnit::NANO)));
+
+  auto options = AssumeTimezoneOptions(
+      "Pacific/Marquesas", AssumeTimezoneOptions::Ambiguous::AMBIGUOUS_LATEST,
+      AssumeTimezoneOptions::Nonexistent::NONEXISTENT_EARLIEST);
+  for (auto _ : state) {
+    ABORT_NOT_OK(AssumeTimezone(timestamp_array, options, ctx).status());
+  }
+
+  state.SetItemsProcessed(state.iterations() * array_size);
+}
+
+auto zoned = timestamp(TimeUnit::NANO, "Pacific/Marquesas");
+auto non_zoned = timestamp(TimeUnit::NANO);
+
+#define DECLARE_TEMPORAL_ROUNDING_BENCHMARKS(OPTIONS)                              \
+  BENCHMARK_TEMPLATE(BenchmarkTemporalRounding, CeilTemporal, zoned, OPTIONS)      \
+      ->Apply(SetArgs);                                                            \
+  BENCHMARK_TEMPLATE(BenchmarkTemporalRounding, FloorTemporal, zoned, OPTIONS)     \
+      ->Apply(SetArgs);                                                            \
+  BENCHMARK_TEMPLATE(BenchmarkTemporalRounding, RoundTemporal, zoned, OPTIONS)     \
+      ->Apply(SetArgs);                                                            \
+  BENCHMARK_TEMPLATE(BenchmarkTemporalRounding, CeilTemporal, non_zoned, OPTIONS)  \
+      ->Apply(SetArgs);                                                            \
+  BENCHMARK_TEMPLATE(BenchmarkTemporalRounding, FloorTemporal, non_zoned, OPTIONS) \
+      ->Apply(SetArgs);                                                            \
+  BENCHMARK_TEMPLATE(BenchmarkTemporalRounding, RoundTemporal, non_zoned, OPTIONS) \
+      ->Apply(SetArgs);
+
+#define DECLARE_TEMPORAL_BENCHMARKS(OP)                                 \
+  BENCHMARK_TEMPLATE(BenchmarkTemporal, OP, non_zoned)->Apply(SetArgs); \
+  BENCHMARK_TEMPLATE(BenchmarkTemporal, OP, zoned)->Apply(SetArgs);
+
+#define DECLARE_TEMPORAL_BENCHMARKS_ZONED(OP) \
+  BENCHMARK_TEMPLATE(BenchmarkTemporal, OP, zoned)->Apply(SetArgs);
+
+// Temporal rounding benchmarks
+auto round_1_minute = RoundTemporalOptions(1, CalendarUnit::MINUTE);
+auto round_10_minute = RoundTemporalOptions(10, CalendarUnit::MINUTE);
+auto round_1_week = RoundTemporalOptions(1, CalendarUnit::WEEK);
+auto round_10_week = RoundTemporalOptions(10, CalendarUnit::WEEK);
+auto round_1_month = RoundTemporalOptions(1, CalendarUnit::MONTH);
+auto round_10_month = RoundTemporalOptions(10, CalendarUnit::MONTH);
+
+DECLARE_TEMPORAL_ROUNDING_BENCHMARKS(round_1_minute);
+DECLARE_TEMPORAL_ROUNDING_BENCHMARKS(round_1_week);
+DECLARE_TEMPORAL_ROUNDING_BENCHMARKS(round_1_month);
+DECLARE_TEMPORAL_ROUNDING_BENCHMARKS(round_10_minute);
+DECLARE_TEMPORAL_ROUNDING_BENCHMARKS(round_10_week);
+DECLARE_TEMPORAL_ROUNDING_BENCHMARKS(round_10_month);
+
+// Temporal component extraction
+DECLARE_TEMPORAL_BENCHMARKS(Year);
+DECLARE_TEMPORAL_BENCHMARKS(IsLeapYear);
+DECLARE_TEMPORAL_BENCHMARKS(Month);
+DECLARE_TEMPORAL_BENCHMARKS(Day);
+DECLARE_TEMPORAL_BENCHMARKS(DayOfYear);
+DECLARE_TEMPORAL_BENCHMARKS_ZONED(IsDaylightSavings);
+DECLARE_TEMPORAL_BENCHMARKS(USYear);
+DECLARE_TEMPORAL_BENCHMARKS(ISOYear);
+DECLARE_TEMPORAL_BENCHMARKS(ISOWeek);
+DECLARE_TEMPORAL_BENCHMARKS(USWeek);
+DECLARE_TEMPORAL_BENCHMARKS(Quarter);
+DECLARE_TEMPORAL_BENCHMARKS(Hour);
+DECLARE_TEMPORAL_BENCHMARKS(Minute);
+DECLARE_TEMPORAL_BENCHMARKS(Second);
+DECLARE_TEMPORAL_BENCHMARKS(Millisecond);
+DECLARE_TEMPORAL_BENCHMARKS(Microsecond);
+DECLARE_TEMPORAL_BENCHMARKS(Nanosecond);
+DECLARE_TEMPORAL_BENCHMARKS(Subsecond);
+
+// Other temporal benchmarks
+BENCHMARK_TEMPLATE(BenchmarkStrftime, non_zoned)->Apply(SetArgs);
+BENCHMARK_TEMPLATE(BenchmarkStrftime, zoned)->Apply(SetArgs);
+BENCHMARK_TEMPLATE(BenchmarkStrptime, non_zoned)->Apply(SetArgs);
+BENCHMARK_TEMPLATE(BenchmarkStrptime, zoned)->Apply(SetArgs);
+BENCHMARK(BenchmarkAssumeTimezone)->Apply(SetArgs);
+
+}  // namespace compute
+}  // namespace arrow