You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by li...@apache.org on 2022/06/09 15:56:19 UTC

[arrow] branch master updated: ARROW-16741: [C++] Add Benchmarks for Binary Temporal Operations (#13302)

This is an automated email from the ASF dual-hosted git repository.

lidavidm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 32054f75ab ARROW-16741: [C++] Add Benchmarks for Binary Temporal Operations (#13302)
32054f75ab is described below

commit 32054f75ab91c66cab82528b0549a2ae3b624ae8
Author: Ivan Chau <iv...@gmail.com>
AuthorDate: Thu Jun 9 11:56:14 2022 -0400

    ARROW-16741: [C++] Add Benchmarks for Binary Temporal Operations (#13302)
    
    Add all binary temporal benchmarks and documentation to `api_scalar.h`
    
    Authored-by: Ivan Chau <iv...@gmail.com>
    Signed-off-by: David Li <li...@gmail.com>
---
 cpp/src/arrow/compute/api_scalar.cc                |  14 ++
 cpp/src/arrow/compute/api_scalar.h                 | 157 +++++++++++++++++++++
 .../compute/kernels/scalar_temporal_benchmark.cc   |  48 +++++++
 3 files changed, 219 insertions(+)

diff --git a/cpp/src/arrow/compute/api_scalar.cc b/cpp/src/arrow/compute/api_scalar.cc
index cd5b4ce799..e6b0ade315 100644
--- a/cpp/src/arrow/compute/api_scalar.cc
+++ b/cpp/src/arrow/compute/api_scalar.cc
@@ -840,6 +840,20 @@ Result<Datum> Week(const Datum& arg, WeekOptions options, ExecContext* ctx) {
   return CallFunction("week", {arg}, &options, ctx);
 }
 
+SCALAR_EAGER_BINARY(YearsBetween, "years_between")
+SCALAR_EAGER_BINARY(QuartersBetween, "quarters_between")
+SCALAR_EAGER_BINARY(MonthsBetween, "month_interval_between")
+SCALAR_EAGER_BINARY(WeeksBetween, "weeks_between")
+SCALAR_EAGER_BINARY(MonthDayNanoBetween, "month_day_nano_interval_between")
+SCALAR_EAGER_BINARY(DayTimeBetween, "day_time_interval_between")
+SCALAR_EAGER_BINARY(DaysBetween, "days_between")
+SCALAR_EAGER_BINARY(HoursBetween, "hours_between")
+SCALAR_EAGER_BINARY(MinutesBetween, "minutes_between")
+SCALAR_EAGER_BINARY(SecondsBetween, "seconds_between")
+SCALAR_EAGER_BINARY(MillisecondsBetween, "milliseconds_between")
+SCALAR_EAGER_BINARY(MicrosecondsBetween, "microseconds_between")
+SCALAR_EAGER_BINARY(NanosecondsBetween, "nanoseconds_between")
+
 // ----------------------------------------------------------------------
 // Structural transforms
 Result<Datum> MapLookup(const Datum& arg, MapLookupOptions options, ExecContext* ctx) {
diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h
index 9fb7a94210..7f7b23c937 100644
--- a/cpp/src/arrow/compute/api_scalar.h
+++ b/cpp/src/arrow/compute/api_scalar.h
@@ -1467,6 +1467,163 @@ ARROW_EXPORT Result<Datum> AssumeTimezone(const Datum& values,
 ARROW_EXPORT Result<Datum> IsDaylightSavings(const Datum& values,
                                              ExecContext* ctx = NULLPTR);
 
+/// \brief Years Between finds the number of years between two values
+///
+/// \param[in] left input treated as the start time
+/// \param[in] right input treated as the end time
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 8.0.0
+/// \note API not yet finalized
+ARROW_EXPORT Result<Datum> YearsBetween(const Datum& left, const Datum& right,
+                                        ExecContext* ctx = NULLPTR);
+
+/// \brief Quarters Between finds the number of quarters between two values
+///
+/// \param[in] left input treated as the start time
+/// \param[in] right input treated as the end time
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 8.0.0
+/// \note API not yet finalized
+ARROW_EXPORT Result<Datum> QuartersBetween(const Datum& left, const Datum& right,
+                                           ExecContext* ctx = NULLPTR);
+
+/// \brief Months Between finds the number of month between two values
+///
+/// \param[in] left input treated as the start time
+/// \param[in] right input treated as the end time
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 8.0.0
+/// \note API not yet finalized
+ARROW_EXPORT Result<Datum> MonthsBetween(const Datum& left, const Datum& right,
+                                         ExecContext* ctx = NULLPTR);
+
+/// \brief Weeks Between finds the number of weeks between two values
+///
+/// \param[in] left input treated as the start time
+/// \param[in] right input treated as the end time
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 8.0.0
+/// \note API not yet finalized
+ARROW_EXPORT Result<Datum> WeeksBetween(const Datum& left, const Datum& right,
+                                        ExecContext* ctx = NULLPTR);
+
+/// \brief Month Day Nano Between finds the number of months, days, and nonaseconds
+/// between two values
+///
+/// \param[in] left input treated as the start time
+/// \param[in] right input treated as the end time
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 8.0.0
+/// \note API not yet finalized
+ARROW_EXPORT Result<Datum> MonthDayNanoBetween(const Datum& left, const Datum& right,
+                                               ExecContext* ctx = NULLPTR);
+
+/// \brief DayTime Between finds the number of days and milliseconds between two values
+///
+/// \param[in] left input treated as the start time
+/// \param[in] right input treated as the end time
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 8.0.0
+/// \note API not yet finalized
+ARROW_EXPORT Result<Datum> DayTimeBetween(const Datum& left, const Datum& right,
+                                          ExecContext* ctx = NULLPTR);
+
+/// \brief Days Between finds the number of days between two values
+///
+/// \param[in] left input treated as the start time
+/// \param[in] right input treated as the end time
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 8.0.0
+/// \note API not yet finalized
+ARROW_EXPORT Result<Datum> DaysBetween(const Datum& left, const Datum& right,
+                                       ExecContext* ctx = NULLPTR);
+
+/// \brief Hours Between finds the number of hours between two values
+///
+/// \param[in] left input treated as the start time
+/// \param[in] right input treated as the end time
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 8.0.0
+/// \note API not yet finalized
+ARROW_EXPORT Result<Datum> HoursBetween(const Datum& left, const Datum& right,
+                                        ExecContext* ctx = NULLPTR);
+
+/// \brief Minutes Between finds the number of minutes between two values
+///
+/// \param[in] left input treated as the start time
+/// \param[in] right input treated as the end time
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 8.0.0
+/// \note API not yet finalized
+ARROW_EXPORT Result<Datum> MinutesBetween(const Datum& left, const Datum& right,
+                                          ExecContext* ctx = NULLPTR);
+
+/// \brief Seconds Between finds the number of hours between two values
+///
+/// \param[in] left input treated as the start time
+/// \param[in] right input treated as the end time
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 8.0.0
+/// \note API not yet finalized
+ARROW_EXPORT Result<Datum> SecondsBetween(const Datum& left, const Datum& right,
+                                          ExecContext* ctx = NULLPTR);
+
+/// \brief Milliseconds Between finds the number of milliseconds between two values
+///
+/// \param[in] left input treated as the start time
+/// \param[in] right input treated as the end time
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 8.0.0
+/// \note API not yet finalized
+ARROW_EXPORT Result<Datum> MillisecondsBetween(const Datum& left, const Datum& right,
+                                               ExecContext* ctx = NULLPTR);
+
+/// \brief Microseconds Between finds the number of microseconds between two values
+///
+/// \param[in] left input treated as the start time
+/// \param[in] right input treated as the end time
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 8.0.0
+/// \note API not yet finalized
+ARROW_EXPORT Result<Datum> MicrosecondsBetween(const Datum& left, const Datum& right,
+                                               ExecContext* ctx = NULLPTR);
+
+/// \brief Nanoseconds Between finds the number of nanoseconds between two values
+///
+/// \param[in] left input treated as the start time
+/// \param[in] right input treated as the end time
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 8.0.0
+/// \note API not yet finalized
+ARROW_EXPORT Result<Datum> NanosecondsBetween(const Datum& left, const Datum& right,
+                                              ExecContext* ctx = NULLPTR);
+
 /// \brief Finds either the FIRST, LAST, or ALL items with a key that matches the given
 /// query key in a map.
 ///
diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_benchmark.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_benchmark.cc
index ddaa502a34..29c2acfd0a 100644
--- a/cpp/src/arrow/compute/kernels/scalar_temporal_benchmark.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_temporal_benchmark.cc
@@ -44,6 +44,7 @@ void SetArgs(benchmark::internal::Benchmark* bench) {
 using UnaryRoundingOp = Result<Datum>(const Datum&, const RoundTemporalOptions,
                                       ExecContext*);
 using UnaryOp = Result<Datum>(const Datum&, ExecContext*);
+using BinaryOp = Result<Datum>(const Datum&, const Datum&, ExecContext*);
 
 template <UnaryRoundingOp& Op, std::shared_ptr<DataType>& timestamp_type,
           RoundTemporalOptions& options>
@@ -84,6 +85,24 @@ static void BenchmarkTemporal(benchmark::State& state) {
   state.SetItemsProcessed(state.iterations() * array_size);
 }
 
+template <BinaryOp& Op, std::shared_ptr<DataType>& timestamp_type>
+static void BenchmarkTemporalBinary(benchmark::State& state) {
+  RegressionArgs args(state);
+  ExecContext* ctx = default_exec_context();
+
+  const int64_t array_size = args.size / sizeof(timestamp_type);
+
+  auto rand = random::RandomArrayGenerator(kSeed);
+  auto lhs = rand.ArrayOf(timestamp_type, args.size, args.null_proportion);
+  auto rhs = rand.ArrayOf(timestamp_type, args.size, args.null_proportion);
+
+  for (auto _ : state) {
+    ABORT_NOT_OK(Op(lhs, rhs, ctx).status());
+  }
+
+  state.SetItemsProcessed(state.iterations() * array_size);
+}
+
 template <std::shared_ptr<DataType>& timestamp_type>
 static void BenchmarkStrftime(benchmark::State& state) {
   RegressionArgs args(state);
@@ -150,6 +169,10 @@ static void BenchmarkAssumeTimezone(benchmark::State& state) {
 
 auto zoned = timestamp(TimeUnit::NANO, "Pacific/Marquesas");
 auto non_zoned = timestamp(TimeUnit::NANO);
+auto time32_type = time32(TimeUnit::MILLI);
+auto time64_type = time64(TimeUnit::NANO);
+auto date32_type = date32();
+auto date64_type = date64();
 
 #define DECLARE_TEMPORAL_ROUNDING_BENCHMARKS(OPTIONS)                              \
   BENCHMARK_TEMPLATE(BenchmarkTemporalRounding, CeilTemporal, zoned, OPTIONS)      \
@@ -172,6 +195,17 @@ auto non_zoned = timestamp(TimeUnit::NANO);
 #define DECLARE_TEMPORAL_BENCHMARKS_ZONED(OP) \
   BENCHMARK_TEMPLATE(BenchmarkTemporal, OP, zoned)->Apply(SetArgs);
 
+#define DECLARE_TEMPORAL_BINARY_BENCHMARKS_DATES_AND_TIMESTAMPS(OP)             \
+  BENCHMARK_TEMPLATE(BenchmarkTemporalBinary, OP, non_zoned)->Apply(SetArgs);   \
+  BENCHMARK_TEMPLATE(BenchmarkTemporalBinary, OP, zoned)->Apply(SetArgs);       \
+  BENCHMARK_TEMPLATE(BenchmarkTemporalBinary, OP, date64_type)->Apply(SetArgs); \
+  BENCHMARK_TEMPLATE(BenchmarkTemporalBinary, OP, date32_type)->Apply(SetArgs);
+
+#define DECLARE_TEMPORAL_BINARY_BENCHMARKS_DATES_TIMES_AND_TIMESTAMPS(OP)       \
+  DECLARE_TEMPORAL_BINARY_BENCHMARKS_DATES_AND_TIMESTAMPS(OP);                  \
+  BENCHMARK_TEMPLATE(BenchmarkTemporalBinary, OP, time32_type)->Apply(SetArgs); \
+  BENCHMARK_TEMPLATE(BenchmarkTemporalBinary, OP, time64_type)->Apply(SetArgs);
+
 // Temporal rounding benchmarks
 auto round_1_minute = RoundTemporalOptions(1, CalendarUnit::MINUTE);
 auto round_10_minute = RoundTemporalOptions(10, CalendarUnit::MINUTE);
@@ -214,5 +248,19 @@ BENCHMARK_TEMPLATE(BenchmarkStrptime, non_zoned)->Apply(SetArgs);
 BENCHMARK_TEMPLATE(BenchmarkStrptime, zoned)->Apply(SetArgs);
 BENCHMARK(BenchmarkAssumeTimezone)->Apply(SetArgs);
 
+// binary temporal benchmarks
+DECLARE_TEMPORAL_BINARY_BENCHMARKS_DATES_AND_TIMESTAMPS(YearsBetween);
+DECLARE_TEMPORAL_BINARY_BENCHMARKS_DATES_AND_TIMESTAMPS(QuartersBetween);
+DECLARE_TEMPORAL_BINARY_BENCHMARKS_DATES_AND_TIMESTAMPS(MonthsBetween);
+DECLARE_TEMPORAL_BINARY_BENCHMARKS_DATES_TIMES_AND_TIMESTAMPS(MonthDayNanoBetween);
+DECLARE_TEMPORAL_BINARY_BENCHMARKS_DATES_AND_TIMESTAMPS(WeeksBetween);
+DECLARE_TEMPORAL_BINARY_BENCHMARKS_DATES_TIMES_AND_TIMESTAMPS(DayTimeBetween);
+DECLARE_TEMPORAL_BINARY_BENCHMARKS_DATES_AND_TIMESTAMPS(DaysBetween);
+DECLARE_TEMPORAL_BINARY_BENCHMARKS_DATES_TIMES_AND_TIMESTAMPS(HoursBetween);
+DECLARE_TEMPORAL_BINARY_BENCHMARKS_DATES_TIMES_AND_TIMESTAMPS(MinutesBetween);
+DECLARE_TEMPORAL_BINARY_BENCHMARKS_DATES_TIMES_AND_TIMESTAMPS(SecondsBetween);
+DECLARE_TEMPORAL_BINARY_BENCHMARKS_DATES_TIMES_AND_TIMESTAMPS(MillisecondsBetween);
+DECLARE_TEMPORAL_BINARY_BENCHMARKS_DATES_TIMES_AND_TIMESTAMPS(MicrosecondsBetween);
+DECLARE_TEMPORAL_BINARY_BENCHMARKS_DATES_TIMES_AND_TIMESTAMPS(NanosecondsBetween);
 }  // namespace compute
 }  // namespace arrow