You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by li...@apache.org on 2022/06/09 15:56:19 UTC
[arrow] branch master updated: ARROW-16741: [C++] Add Benchmarks for Binary Temporal Operations (#13302)
This is an automated email from the ASF dual-hosted git repository.
lidavidm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 32054f75ab ARROW-16741: [C++] Add Benchmarks for Binary Temporal Operations (#13302)
32054f75ab is described below
commit 32054f75ab91c66cab82528b0549a2ae3b624ae8
Author: Ivan Chau <iv...@gmail.com>
AuthorDate: Thu Jun 9 11:56:14 2022 -0400
ARROW-16741: [C++] Add Benchmarks for Binary Temporal Operations (#13302)
Add all binary temporal benchmarks and documentation to `api_scalar.h`
Authored-by: Ivan Chau <iv...@gmail.com>
Signed-off-by: David Li <li...@gmail.com>
---
cpp/src/arrow/compute/api_scalar.cc | 14 ++
cpp/src/arrow/compute/api_scalar.h | 157 +++++++++++++++++++++
.../compute/kernels/scalar_temporal_benchmark.cc | 48 +++++++
3 files changed, 219 insertions(+)
diff --git a/cpp/src/arrow/compute/api_scalar.cc b/cpp/src/arrow/compute/api_scalar.cc
index cd5b4ce799..e6b0ade315 100644
--- a/cpp/src/arrow/compute/api_scalar.cc
+++ b/cpp/src/arrow/compute/api_scalar.cc
@@ -840,6 +840,20 @@ Result<Datum> Week(const Datum& arg, WeekOptions options, ExecContext* ctx) {
return CallFunction("week", {arg}, &options, ctx);
}
+SCALAR_EAGER_BINARY(YearsBetween, "years_between")
+SCALAR_EAGER_BINARY(QuartersBetween, "quarters_between")
+SCALAR_EAGER_BINARY(MonthsBetween, "month_interval_between")
+SCALAR_EAGER_BINARY(WeeksBetween, "weeks_between")
+SCALAR_EAGER_BINARY(MonthDayNanoBetween, "month_day_nano_interval_between")
+SCALAR_EAGER_BINARY(DayTimeBetween, "day_time_interval_between")
+SCALAR_EAGER_BINARY(DaysBetween, "days_between")
+SCALAR_EAGER_BINARY(HoursBetween, "hours_between")
+SCALAR_EAGER_BINARY(MinutesBetween, "minutes_between")
+SCALAR_EAGER_BINARY(SecondsBetween, "seconds_between")
+SCALAR_EAGER_BINARY(MillisecondsBetween, "milliseconds_between")
+SCALAR_EAGER_BINARY(MicrosecondsBetween, "microseconds_between")
+SCALAR_EAGER_BINARY(NanosecondsBetween, "nanoseconds_between")
+
// ----------------------------------------------------------------------
// Structural transforms
Result<Datum> MapLookup(const Datum& arg, MapLookupOptions options, ExecContext* ctx) {
diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h
index 9fb7a94210..7f7b23c937 100644
--- a/cpp/src/arrow/compute/api_scalar.h
+++ b/cpp/src/arrow/compute/api_scalar.h
@@ -1467,6 +1467,163 @@ ARROW_EXPORT Result<Datum> AssumeTimezone(const Datum& values,
ARROW_EXPORT Result<Datum> IsDaylightSavings(const Datum& values,
ExecContext* ctx = NULLPTR);
+/// \brief Years Between finds the number of years between two values
+///
+/// \param[in] left input treated as the start time
+/// \param[in] right input treated as the end time
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 8.0.0
+/// \note API not yet finalized
+ARROW_EXPORT Result<Datum> YearsBetween(const Datum& left, const Datum& right,
+ ExecContext* ctx = NULLPTR);
+
+/// \brief Quarters Between finds the number of quarters between two values
+///
+/// \param[in] left input treated as the start time
+/// \param[in] right input treated as the end time
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 8.0.0
+/// \note API not yet finalized
+ARROW_EXPORT Result<Datum> QuartersBetween(const Datum& left, const Datum& right,
+ ExecContext* ctx = NULLPTR);
+
+/// \brief Months Between finds the number of month between two values
+///
+/// \param[in] left input treated as the start time
+/// \param[in] right input treated as the end time
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 8.0.0
+/// \note API not yet finalized
+ARROW_EXPORT Result<Datum> MonthsBetween(const Datum& left, const Datum& right,
+ ExecContext* ctx = NULLPTR);
+
+/// \brief Weeks Between finds the number of weeks between two values
+///
+/// \param[in] left input treated as the start time
+/// \param[in] right input treated as the end time
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 8.0.0
+/// \note API not yet finalized
+ARROW_EXPORT Result<Datum> WeeksBetween(const Datum& left, const Datum& right,
+ ExecContext* ctx = NULLPTR);
+
+/// \brief Month Day Nano Between finds the number of months, days, and nonaseconds
+/// between two values
+///
+/// \param[in] left input treated as the start time
+/// \param[in] right input treated as the end time
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 8.0.0
+/// \note API not yet finalized
+ARROW_EXPORT Result<Datum> MonthDayNanoBetween(const Datum& left, const Datum& right,
+ ExecContext* ctx = NULLPTR);
+
+/// \brief DayTime Between finds the number of days and milliseconds between two values
+///
+/// \param[in] left input treated as the start time
+/// \param[in] right input treated as the end time
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 8.0.0
+/// \note API not yet finalized
+ARROW_EXPORT Result<Datum> DayTimeBetween(const Datum& left, const Datum& right,
+ ExecContext* ctx = NULLPTR);
+
+/// \brief Days Between finds the number of days between two values
+///
+/// \param[in] left input treated as the start time
+/// \param[in] right input treated as the end time
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 8.0.0
+/// \note API not yet finalized
+ARROW_EXPORT Result<Datum> DaysBetween(const Datum& left, const Datum& right,
+ ExecContext* ctx = NULLPTR);
+
+/// \brief Hours Between finds the number of hours between two values
+///
+/// \param[in] left input treated as the start time
+/// \param[in] right input treated as the end time
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 8.0.0
+/// \note API not yet finalized
+ARROW_EXPORT Result<Datum> HoursBetween(const Datum& left, const Datum& right,
+ ExecContext* ctx = NULLPTR);
+
+/// \brief Minutes Between finds the number of minutes between two values
+///
+/// \param[in] left input treated as the start time
+/// \param[in] right input treated as the end time
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 8.0.0
+/// \note API not yet finalized
+ARROW_EXPORT Result<Datum> MinutesBetween(const Datum& left, const Datum& right,
+ ExecContext* ctx = NULLPTR);
+
+/// \brief Seconds Between finds the number of hours between two values
+///
+/// \param[in] left input treated as the start time
+/// \param[in] right input treated as the end time
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 8.0.0
+/// \note API not yet finalized
+ARROW_EXPORT Result<Datum> SecondsBetween(const Datum& left, const Datum& right,
+ ExecContext* ctx = NULLPTR);
+
+/// \brief Milliseconds Between finds the number of milliseconds between two values
+///
+/// \param[in] left input treated as the start time
+/// \param[in] right input treated as the end time
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 8.0.0
+/// \note API not yet finalized
+ARROW_EXPORT Result<Datum> MillisecondsBetween(const Datum& left, const Datum& right,
+ ExecContext* ctx = NULLPTR);
+
+/// \brief Microseconds Between finds the number of microseconds between two values
+///
+/// \param[in] left input treated as the start time
+/// \param[in] right input treated as the end time
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 8.0.0
+/// \note API not yet finalized
+ARROW_EXPORT Result<Datum> MicrosecondsBetween(const Datum& left, const Datum& right,
+ ExecContext* ctx = NULLPTR);
+
+/// \brief Nanoseconds Between finds the number of nanoseconds between two values
+///
+/// \param[in] left input treated as the start time
+/// \param[in] right input treated as the end time
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 8.0.0
+/// \note API not yet finalized
+ARROW_EXPORT Result<Datum> NanosecondsBetween(const Datum& left, const Datum& right,
+ ExecContext* ctx = NULLPTR);
+
/// \brief Finds either the FIRST, LAST, or ALL items with a key that matches the given
/// query key in a map.
///
diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_benchmark.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_benchmark.cc
index ddaa502a34..29c2acfd0a 100644
--- a/cpp/src/arrow/compute/kernels/scalar_temporal_benchmark.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_temporal_benchmark.cc
@@ -44,6 +44,7 @@ void SetArgs(benchmark::internal::Benchmark* bench) {
using UnaryRoundingOp = Result<Datum>(const Datum&, const RoundTemporalOptions,
ExecContext*);
using UnaryOp = Result<Datum>(const Datum&, ExecContext*);
+using BinaryOp = Result<Datum>(const Datum&, const Datum&, ExecContext*);
template <UnaryRoundingOp& Op, std::shared_ptr<DataType>& timestamp_type,
RoundTemporalOptions& options>
@@ -84,6 +85,24 @@ static void BenchmarkTemporal(benchmark::State& state) {
state.SetItemsProcessed(state.iterations() * array_size);
}
+template <BinaryOp& Op, std::shared_ptr<DataType>& timestamp_type>
+static void BenchmarkTemporalBinary(benchmark::State& state) {
+ RegressionArgs args(state);
+ ExecContext* ctx = default_exec_context();
+
+ const int64_t array_size = args.size / sizeof(timestamp_type);
+
+ auto rand = random::RandomArrayGenerator(kSeed);
+ auto lhs = rand.ArrayOf(timestamp_type, args.size, args.null_proportion);
+ auto rhs = rand.ArrayOf(timestamp_type, args.size, args.null_proportion);
+
+ for (auto _ : state) {
+ ABORT_NOT_OK(Op(lhs, rhs, ctx).status());
+ }
+
+ state.SetItemsProcessed(state.iterations() * array_size);
+}
+
template <std::shared_ptr<DataType>& timestamp_type>
static void BenchmarkStrftime(benchmark::State& state) {
RegressionArgs args(state);
@@ -150,6 +169,10 @@ static void BenchmarkAssumeTimezone(benchmark::State& state) {
auto zoned = timestamp(TimeUnit::NANO, "Pacific/Marquesas");
auto non_zoned = timestamp(TimeUnit::NANO);
+auto time32_type = time32(TimeUnit::MILLI);
+auto time64_type = time64(TimeUnit::NANO);
+auto date32_type = date32();
+auto date64_type = date64();
#define DECLARE_TEMPORAL_ROUNDING_BENCHMARKS(OPTIONS) \
BENCHMARK_TEMPLATE(BenchmarkTemporalRounding, CeilTemporal, zoned, OPTIONS) \
@@ -172,6 +195,17 @@ auto non_zoned = timestamp(TimeUnit::NANO);
#define DECLARE_TEMPORAL_BENCHMARKS_ZONED(OP) \
BENCHMARK_TEMPLATE(BenchmarkTemporal, OP, zoned)->Apply(SetArgs);
+#define DECLARE_TEMPORAL_BINARY_BENCHMARKS_DATES_AND_TIMESTAMPS(OP) \
+ BENCHMARK_TEMPLATE(BenchmarkTemporalBinary, OP, non_zoned)->Apply(SetArgs); \
+ BENCHMARK_TEMPLATE(BenchmarkTemporalBinary, OP, zoned)->Apply(SetArgs); \
+ BENCHMARK_TEMPLATE(BenchmarkTemporalBinary, OP, date64_type)->Apply(SetArgs); \
+ BENCHMARK_TEMPLATE(BenchmarkTemporalBinary, OP, date32_type)->Apply(SetArgs);
+
+#define DECLARE_TEMPORAL_BINARY_BENCHMARKS_DATES_TIMES_AND_TIMESTAMPS(OP) \
+ DECLARE_TEMPORAL_BINARY_BENCHMARKS_DATES_AND_TIMESTAMPS(OP); \
+ BENCHMARK_TEMPLATE(BenchmarkTemporalBinary, OP, time32_type)->Apply(SetArgs); \
+ BENCHMARK_TEMPLATE(BenchmarkTemporalBinary, OP, time64_type)->Apply(SetArgs);
+
// Temporal rounding benchmarks
auto round_1_minute = RoundTemporalOptions(1, CalendarUnit::MINUTE);
auto round_10_minute = RoundTemporalOptions(10, CalendarUnit::MINUTE);
@@ -214,5 +248,19 @@ BENCHMARK_TEMPLATE(BenchmarkStrptime, non_zoned)->Apply(SetArgs);
BENCHMARK_TEMPLATE(BenchmarkStrptime, zoned)->Apply(SetArgs);
BENCHMARK(BenchmarkAssumeTimezone)->Apply(SetArgs);
+// binary temporal benchmarks
+DECLARE_TEMPORAL_BINARY_BENCHMARKS_DATES_AND_TIMESTAMPS(YearsBetween);
+DECLARE_TEMPORAL_BINARY_BENCHMARKS_DATES_AND_TIMESTAMPS(QuartersBetween);
+DECLARE_TEMPORAL_BINARY_BENCHMARKS_DATES_AND_TIMESTAMPS(MonthsBetween);
+DECLARE_TEMPORAL_BINARY_BENCHMARKS_DATES_TIMES_AND_TIMESTAMPS(MonthDayNanoBetween);
+DECLARE_TEMPORAL_BINARY_BENCHMARKS_DATES_AND_TIMESTAMPS(WeeksBetween);
+DECLARE_TEMPORAL_BINARY_BENCHMARKS_DATES_TIMES_AND_TIMESTAMPS(DayTimeBetween);
+DECLARE_TEMPORAL_BINARY_BENCHMARKS_DATES_AND_TIMESTAMPS(DaysBetween);
+DECLARE_TEMPORAL_BINARY_BENCHMARKS_DATES_TIMES_AND_TIMESTAMPS(HoursBetween);
+DECLARE_TEMPORAL_BINARY_BENCHMARKS_DATES_TIMES_AND_TIMESTAMPS(MinutesBetween);
+DECLARE_TEMPORAL_BINARY_BENCHMARKS_DATES_TIMES_AND_TIMESTAMPS(SecondsBetween);
+DECLARE_TEMPORAL_BINARY_BENCHMARKS_DATES_TIMES_AND_TIMESTAMPS(MillisecondsBetween);
+DECLARE_TEMPORAL_BINARY_BENCHMARKS_DATES_TIMES_AND_TIMESTAMPS(MicrosecondsBetween);
+DECLARE_TEMPORAL_BINARY_BENCHMARKS_DATES_TIMES_AND_TIMESTAMPS(NanosecondsBetween);
} // namespace compute
} // namespace arrow