You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ap...@apache.org on 2022/06/02 12:55:52 UTC
[arrow] branch master updated: ARROW-14821: [C++] Add ceil_is_strictly_greater and calendar_based_origin temporal round options (to mimic lubridate's date rounding) (#12657)
This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new fc082c5e89 ARROW-14821: [C++] Add ceil_is_strictly_greater and calendar_based_origin temporal round options (to mimic lubridate's date rounding) (#12657)
fc082c5e89 is described below
commit fc082c5e8982f9db8366feaab25d6449b273d35e
Author: Rok Mihevc <ro...@mihevc.org>
AuthorDate: Thu Jun 2 14:55:44 2022 +0200
ARROW-14821: [C++] Add ceil_is_strictly_greater and calendar_based_origin temporal round options (to mimic lubridate's date rounding) (#12657)
This implements `RoundTemporalOptions.ceil_is_strictly_greater` and `RoundTemporalOptions.calendar_based_origin` parameters in C++ to enable temporal rounding in R ([ARROW-14821](https://issues.apache.org/jira/browse/ARROW-14821)).
Lead-authored-by: Rok <ro...@mihevc.org>
Co-authored-by: Rok Mihevc <ro...@mihevc.org>
Signed-off-by: Antoine Pitrou <an...@python.org>
---
cpp/src/arrow/compute/api_scalar.cc | 13 +-
cpp/src/arrow/compute/api_scalar.h | 23 +-
.../arrow/compute/kernels/scalar_temporal_test.cc | 617 +++++++++++++++++++++
.../arrow/compute/kernels/scalar_temporal_unary.cc | 264 ++++++---
python/pyarrow/_compute.pyx | 45 +-
python/pyarrow/includes/libarrow.pxd | 6 +-
python/pyarrow/tests/test_compute.py | 53 +-
7 files changed, 918 insertions(+), 103 deletions(-)
diff --git a/cpp/src/arrow/compute/api_scalar.cc b/cpp/src/arrow/compute/api_scalar.cc
index a9e2565a3e..cd5b4ce799 100644
--- a/cpp/src/arrow/compute/api_scalar.cc
+++ b/cpp/src/arrow/compute/api_scalar.cc
@@ -332,7 +332,10 @@ static auto kRoundOptionsType = GetFunctionOptionsType<RoundOptions>(
static auto kRoundTemporalOptionsType = GetFunctionOptionsType<RoundTemporalOptions>(
DataMember("multiple", &RoundTemporalOptions::multiple),
DataMember("unit", &RoundTemporalOptions::unit),
- DataMember("week_starts_monday", &RoundTemporalOptions::week_starts_monday));
+ DataMember("week_starts_monday", &RoundTemporalOptions::week_starts_monday),
+ DataMember("ceil_is_strictly_greater",
+ &RoundTemporalOptions::ceil_is_strictly_greater),
+ DataMember("calendar_based_origin", &RoundTemporalOptions::calendar_based_origin));
static auto kRoundToMultipleOptionsType = GetFunctionOptionsType<RoundToMultipleOptions>(
DataMember("multiple", &RoundToMultipleOptions::multiple),
DataMember("round_mode", &RoundToMultipleOptions::round_mode));
@@ -491,11 +494,15 @@ RoundOptions::RoundOptions(int64_t ndigits, RoundMode round_mode)
constexpr char RoundOptions::kTypeName[];
RoundTemporalOptions::RoundTemporalOptions(int multiple, CalendarUnit unit,
- bool week_starts_monday)
+ bool week_starts_monday,
+ bool ceil_is_strictly_greater,
+ bool calendar_based_origin)
: FunctionOptions(internal::kRoundTemporalOptionsType),
multiple(std::move(multiple)),
unit(unit),
- week_starts_monday(week_starts_monday) {}
+ week_starts_monday(week_starts_monday),
+ ceil_is_strictly_greater(ceil_is_strictly_greater),
+ calendar_based_origin(calendar_based_origin) {}
constexpr char RoundTemporalOptions::kTypeName[];
RoundToMultipleOptions::RoundToMultipleOptions(double multiple, RoundMode round_mode)
diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h
index 0af591acfa..9fb7a94210 100644
--- a/cpp/src/arrow/compute/api_scalar.h
+++ b/cpp/src/arrow/compute/api_scalar.h
@@ -107,7 +107,9 @@ enum class CalendarUnit : int8_t {
class ARROW_EXPORT RoundTemporalOptions : public FunctionOptions {
public:
explicit RoundTemporalOptions(int multiple = 1, CalendarUnit unit = CalendarUnit::DAY,
- bool week_starts_monday = true);
+ bool week_starts_monday = true,
+ bool ceil_is_strictly_greater = false,
+ bool calendar_based_origin = false);
static constexpr char const kTypeName[] = "RoundTemporalOptions";
static RoundTemporalOptions Defaults() { return RoundTemporalOptions(); }
@@ -117,6 +119,25 @@ class ARROW_EXPORT RoundTemporalOptions : public FunctionOptions {
CalendarUnit unit;
/// What day does the week start with (Monday=true, Sunday=false)
bool week_starts_monday;
+ /// Enable this flag to return a rounded value that is strictly greater than the input.
+ /// For example: ceiling 1970-01-01T00:00:00 to 3 hours would yield 1970-01-01T03:00:00
+ /// if set to true and 1970-01-01T00:00:00 if set to false.
+ /// This applies for ceiling only.
+ bool ceil_is_strictly_greater;
+ /// By default time is rounded to a multiple of units since 1970-01-01T00:00:00.
+ /// By setting calendar_based_origin to true, time will be rounded to a number
+ /// of units since the last greater calendar unit.
+ /// For example: rounding to a multiple of days since the beginning of the month or
+ /// to hours since the beginning of the day.
+ /// Exceptions: week and quarter are not used as greater units, therefore days will
+ /// will be rounded to the beginning of the month not week. Greater unit of week
+ /// is year.
+ /// Note that ceiling and rounding might change sorting order of an array near greater
+ /// unit change. For example rounding YYYY-mm-dd 23:00:00 to 5 hours will ceil and
+ /// round to YYYY-mm-dd+1 01:00:00 and floor to YYYY-mm-dd 20:00:00. On the other hand
+ /// YYYY-mm-dd+1 00:00:00 will ceil, round and floor to YYYY-mm-dd+1 00:00:00. This
+ /// can break the order of an already ordered array.
+ bool calendar_based_origin;
};
class ARROW_EXPORT RoundToMultipleOptions : public FunctionOptions {
diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc
index a52d69c36c..45bd7819c4 100644
--- a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc
@@ -2229,6 +2229,370 @@ TEST_F(ScalarTemporalTest, TestCeilTemporal) {
CheckScalarUnary(op, unit, times, unit, ceil_15_years, &round_to_15_years);
}
+TEST_F(ScalarTemporalTest, TestCeilTemporalStrictCeil) {
+ std::string op = "ceil_temporal";
+ RoundTemporalOptions round_to_1_nanoseconds =
+ RoundTemporalOptions(1, CalendarUnit::NANOSECOND, true, true, false);
+ RoundTemporalOptions round_to_1_microseconds =
+ RoundTemporalOptions(1, CalendarUnit::MICROSECOND, true, true, false);
+ RoundTemporalOptions round_to_1_milliseconds =
+ RoundTemporalOptions(1, CalendarUnit::MILLISECOND, true, true, false);
+ RoundTemporalOptions round_to_1_seconds =
+ RoundTemporalOptions(1, CalendarUnit::SECOND, true, true, false);
+ RoundTemporalOptions round_to_1_minutes =
+ RoundTemporalOptions(1, CalendarUnit::MINUTE, true, true, false);
+ RoundTemporalOptions round_to_1_hours =
+ RoundTemporalOptions(1, CalendarUnit::HOUR, true, true, false);
+ RoundTemporalOptions round_to_1_days =
+ RoundTemporalOptions(1, CalendarUnit::DAY, true, true, false);
+ RoundTemporalOptions round_to_1_weeks =
+ RoundTemporalOptions(1, CalendarUnit::WEEK, true, true, false);
+ RoundTemporalOptions round_to_1_weeks_sunday =
+ RoundTemporalOptions(1, CalendarUnit::WEEK, false, true, false);
+ RoundTemporalOptions round_to_1_months =
+ RoundTemporalOptions(1, CalendarUnit::MONTH, true, true, false);
+ RoundTemporalOptions round_to_1_quarters =
+ RoundTemporalOptions(1, CalendarUnit::QUARTER, true, true, false);
+ RoundTemporalOptions round_to_1_years =
+ RoundTemporalOptions(1, CalendarUnit::YEAR, true, true, false);
+
+ RoundTemporalOptions round_to_15_nanoseconds =
+ RoundTemporalOptions(15, CalendarUnit::NANOSECOND, true, true, false);
+ RoundTemporalOptions round_to_15_microseconds =
+ RoundTemporalOptions(15, CalendarUnit::MICROSECOND, true, true, false);
+ RoundTemporalOptions round_to_15_milliseconds =
+ RoundTemporalOptions(15, CalendarUnit::MILLISECOND, true, true, false);
+ RoundTemporalOptions round_to_13_seconds =
+ RoundTemporalOptions(13, CalendarUnit::SECOND, true, true, false);
+ RoundTemporalOptions round_to_13_minutes =
+ RoundTemporalOptions(13, CalendarUnit::MINUTE, true, true, false);
+ RoundTemporalOptions round_to_15_hours =
+ RoundTemporalOptions(15, CalendarUnit::HOUR, true, true, false);
+ RoundTemporalOptions round_to_15_days =
+ RoundTemporalOptions(15, CalendarUnit::DAY, true, true, false);
+ RoundTemporalOptions round_to_15_weeks =
+ RoundTemporalOptions(15, CalendarUnit::WEEK, true, true, false);
+ RoundTemporalOptions round_to_15_weeks_sunday =
+ RoundTemporalOptions(15, CalendarUnit::WEEK, false, true, false);
+ RoundTemporalOptions round_to_15_months =
+ RoundTemporalOptions(15, CalendarUnit::MONTH, true, true, false);
+ RoundTemporalOptions round_to_15_quarters =
+ RoundTemporalOptions(15, CalendarUnit::QUARTER, true, true, false);
+ RoundTemporalOptions round_to_15_years =
+ RoundTemporalOptions(15, CalendarUnit::YEAR, true, true, false);
+
+ const char* ceil_1_nanosecond =
+ R"(["1970-01-01 00:00:59.123456790", "2000-02-29 23:23:24.000000000",
+ "1899-01-01 00:59:20.001001002", "2033-05-18 03:33:20.000000001",
+ "2020-01-01 01:05:05.001000001", "2019-12-31 02:10:10.002000001",
+ "2019-12-30 03:15:15.003000001", "2009-12-31 04:20:20.004132001",
+ "2010-01-01 05:25:25.005321001", "2010-01-03 06:30:30.006163001",
+ "2010-01-04 07:35:35.000000001", "2006-01-01 08:40:40.000000001",
+ "2005-12-31 09:45:45.000000001", "2008-12-28 00:00:00.000000001",
+ "2008-12-29 00:00:00.000000001", "2012-01-01 01:02:03.000000001", null])";
+ const char* ceil_1_microsecond =
+ R"(["1970-01-01 00:00:59.123457", "2000-02-29 23:23:24.000000",
+ "1899-01-01 00:59:20.001002", "2033-05-18 03:33:20.000001",
+ "2020-01-01 01:05:05.001001", "2019-12-31 02:10:10.002001",
+ "2019-12-30 03:15:15.003001", "2009-12-31 04:20:20.004133",
+ "2010-01-01 05:25:25.005322", "2010-01-03 06:30:30.006164",
+ "2010-01-04 07:35:35.000001", "2006-01-01 08:40:40.000001",
+ "2005-12-31 09:45:45.000001", "2008-12-28 00:00:00.000001",
+ "2008-12-29 00:00:00.000001", "2012-01-01 01:02:03.000001", null])";
+ const char* ceil_1_millisecond =
+ R"(["1970-01-01 00:00:59.124", "2000-02-29 23:23:24.000",
+ "1899-01-01 00:59:20.002", "2033-05-18 03:33:20.001",
+ "2020-01-01 01:05:05.002", "2019-12-31 02:10:10.003",
+ "2019-12-30 03:15:15.004", "2009-12-31 04:20:20.005",
+ "2010-01-01 05:25:25.006", "2010-01-03 06:30:30.007",
+ "2010-01-04 07:35:35.001", "2006-01-01 08:40:40.001",
+ "2005-12-31 09:45:45.001", "2008-12-28 00:00:00.001",
+ "2008-12-29 00:00:00.001", "2012-01-01 01:02:03.001", null])";
+ const char* ceil_1_second =
+ R"(["1970-01-01 00:01:00", "2000-02-29 23:23:24", "1899-01-01 00:59:21",
+ "2033-05-18 03:33:21", "2020-01-01 01:05:06", "2019-12-31 02:10:11",
+ "2019-12-30 03:15:16", "2009-12-31 04:20:21", "2010-01-01 05:25:26",
+ "2010-01-03 06:30:31", "2010-01-04 07:35:36", "2006-01-01 08:40:41",
+ "2005-12-31 09:45:46", "2008-12-28 00:00:01", "2008-12-29 00:00:01",
+ "2012-01-01 01:02:04", null])";
+ const char* ceil_1_minute =
+ R"(["1970-01-01 00:01:00", "2000-02-29 23:24:00", "1899-01-01 01:00:00",
+ "2033-05-18 03:34:00", "2020-01-01 01:06:00", "2019-12-31 02:11:00",
+ "2019-12-30 03:16:00", "2009-12-31 04:21:00", "2010-01-01 05:26:00",
+ "2010-01-03 06:31:00", "2010-01-04 07:36:00", "2006-01-01 08:41:00",
+ "2005-12-31 09:46:00", "2008-12-28 00:01:00", "2008-12-29 00:01:00",
+ "2012-01-01 01:03:00", null])";
+ const char* ceil_1_hour =
+ R"(["1970-01-01 01:00:00", "2000-03-01 00:00:00", "1899-01-01 01:00:00",
+ "2033-05-18 04:00:00", "2020-01-01 02:00:00", "2019-12-31 03:00:00",
+ "2019-12-30 04:00:00", "2009-12-31 05:00:00", "2010-01-01 06:00:00",
+ "2010-01-03 07:00:00", "2010-01-04 08:00:00", "2006-01-01 09:00:00",
+ "2005-12-31 10:00:00", "2008-12-28 01:00:00", "2008-12-29 01:00:00",
+ "2012-01-01 02:00:00", null])";
+ const char* ceil_1_day =
+ R"(["1970-01-02", "2000-03-01", "1899-01-02", "2033-05-19",
+ "2020-01-02", "2020-01-01", "2019-12-31", "2010-01-01",
+ "2010-01-02", "2010-01-04", "2010-01-05", "2006-01-02",
+ "2006-01-01", "2008-12-29", "2008-12-30", "2012-01-02", null])";
+ const char* ceil_1_weeks =
+ R"(["1970-01-05", "2000-03-06", "1899-01-02", "2033-05-23",
+ "2020-01-06", "2020-01-06", "2020-01-06", "2010-01-04",
+ "2010-01-04", "2010-01-04", "2010-01-11", "2006-01-02",
+ "2006-01-02", "2008-12-29", "2009-01-05", "2012-01-02", null])";
+ const char* ceil_1_weeks_sunday =
+ R"(["1970-01-04", "2000-03-05", "1899-01-08", "2033-05-22",
+ "2020-01-05", "2020-01-05", "2020-01-05", "2010-01-03",
+ "2010-01-03", "2010-01-10", "2010-01-10", "2006-01-08",
+ "2006-01-01", "2009-01-04", "2009-01-04", "2012-01-08", null])";
+ const char* ceil_1_months =
+ R"(["1970-02-01", "2000-03-01", "1899-02-01", "2033-06-01",
+ "2020-02-01", "2020-01-01", "2020-01-01", "2010-01-01",
+ "2010-02-01", "2010-02-01", "2010-02-01", "2006-02-01",
+ "2006-01-01", "2009-01-01", "2009-01-01", "2012-02-01", null])";
+ const char* ceil_1_quarters =
+ R"(["1970-04-01", "2000-04-01", "1899-04-01", "2033-07-01",
+ "2020-04-01", "2020-01-01", "2020-01-01", "2010-01-01",
+ "2010-04-01", "2010-04-01", "2010-04-01", "2006-04-01",
+ "2006-01-01", "2009-01-01", "2009-01-01", "2012-04-01", null])";
+ const char* ceil_1_years =
+ R"(["1971-01-01", "2001-01-01", "1900-01-01", "2034-01-01",
+ "2021-01-01", "2020-01-01", "2020-01-01", "2010-01-01",
+ "2011-01-01", "2011-01-01", "2011-01-01", "2007-01-01",
+ "2006-01-01", "2009-01-01", "2009-01-01", "2013-01-01", null])";
+
+ const char* ceil_15_nanosecond =
+ R"(["1970-01-01 00:00:59.123456790", "2000-02-29 23:23:24.000000000",
+ "1899-01-01 00:59:20.001001005", "2033-05-18 03:33:20.000000010",
+ "2020-01-01 01:05:05.001000015", "2019-12-31 02:10:10.002000015",
+ "2019-12-30 03:15:15.003000015", "2009-12-31 04:20:20.004132015",
+ "2010-01-01 05:25:25.005321015", "2010-01-03 06:30:30.006163005",
+ "2010-01-04 07:35:35.000000010", "2006-01-01 08:40:40.000000005",
+ "2005-12-31 09:45:45.000000015", "2008-12-28 00:00:00.000000015",
+ "2008-12-29 00:00:00.000000015", "2012-01-01 01:02:03.000000015", null])";
+ const char* ceil_15_microsecond =
+ R"(["1970-01-01 00:00:59.123460", "2000-02-29 23:23:24.000000",
+ "1899-01-01 00:59:20.001015", "2033-05-18 03:33:20.000010",
+ "2020-01-01 01:05:05.001015", "2019-12-31 02:10:10.002015",
+ "2019-12-30 03:15:15.003015", "2009-12-31 04:20:20.004135",
+ "2010-01-01 05:25:25.005330", "2010-01-03 06:30:30.006165",
+ "2010-01-04 07:35:35.000010", "2006-01-01 08:40:40.000005",
+ "2005-12-31 09:45:45.000015", "2008-12-28 00:00:00.000015",
+ "2008-12-29 00:00:00.000015", "2012-01-01 01:02:03.000015", null])";
+ const char* ceil_15_millisecond =
+ R"(["1970-01-01 00:00:59.130", "2000-02-29 23:23:24.000",
+ "1899-01-01 00:59:20.010", "2033-05-18 03:33:20.010",
+ "2020-01-01 01:05:05.010", "2019-12-31 02:10:10.005",
+ "2019-12-30 03:15:15.015", "2009-12-31 04:20:20.010",
+ "2010-01-01 05:25:25.020", "2010-01-03 06:30:30.015",
+ "2010-01-04 07:35:35.010", "2006-01-01 08:40:40.005",
+ "2005-12-31 09:45:45.015", "2008-12-28 00:00:00.015",
+ "2008-12-29 00:00:00.015", "2012-01-01 01:02:03.015", null])";
+ const char* ceil_13_second =
+ R"(["1970-01-01 00:01:05", "2000-02-29 23:23:24", "1899-01-01 00:59:29",
+ "2033-05-18 03:33:22", "2020-01-01 01:05:06", "2019-12-31 02:10:21",
+ "2019-12-30 03:15:23", "2009-12-31 04:20:32", "2010-01-01 05:25:30",
+ "2010-01-03 06:30:39", "2010-01-04 07:35:37", "2006-01-01 08:40:53",
+ "2005-12-31 09:45:55", "2008-12-28 00:00:01", "2008-12-29 00:00:12",
+ "2012-01-01 01:02:11", null])";
+ const char* ceil_13_minute =
+ R"(["1970-01-01 00:13:00", "2000-02-29 23:26:00", "1899-01-01 01:01:00",
+ "2033-05-18 03:39:00", "2020-01-01 01:09:00", "2019-12-31 02:11:00",
+ "2019-12-30 03:26:00", "2009-12-31 04:24:00", "2010-01-01 05:32:00",
+ "2010-01-03 06:43:00", "2010-01-04 07:38:00", "2006-01-01 08:45:00",
+ "2005-12-31 09:47:00", "2008-12-28 00:05:00", "2008-12-29 00:08:00",
+ "2012-01-01 01:05:00", null])";
+ const char* ceil_15_hour =
+ R"(["1970-01-01 15:00:00", "2000-03-01 12:00:00", "1899-01-01 03:00:00",
+ "2033-05-18 18:00:00", "2020-01-01 12:00:00", "2019-12-31 06:00:00",
+ "2019-12-30 15:00:00", "2009-12-31 09:00:00", "2010-01-01 15:00:00",
+ "2010-01-03 12:00:00", "2010-01-04 18:00:00", "2006-01-01 09:00:00",
+ "2005-12-31 18:00:00", "2008-12-28 06:00:00", "2008-12-29 12:00:00",
+ "2012-01-01 15:00:00", null])";
+ const char* ceil_15_day =
+ R"(["1970-01-16", "2000-03-09", "1899-01-13", "2033-05-30", "2020-01-09",
+ "2020-01-09", "2020-01-09", "2010-01-01", "2010-01-16", "2010-01-16",
+ "2010-01-16", "2006-01-07", "2006-01-07", "2009-01-06", "2009-01-06",
+ "2012-01-06", null])";
+ const char* ceil_15_weeks =
+ R"(["1970-04-13", "2000-03-06", "1899-04-10", "2033-07-11", "2020-01-06",
+ "2020-01-06", "2020-01-06", "2010-03-29", "2010-03-29", "2010-03-29",
+ "2010-03-29", "2006-03-20", "2006-03-20", "2009-02-02", "2009-02-02",
+ "2012-04-02", null])";
+ const char* ceil_15_weeks_sunday =
+ R"(["1970-04-12", "2000-03-05", "1899-04-09", "2033-07-10", "2020-01-05",
+ "2020-01-05", "2020-01-05", "2010-03-28", "2010-03-28", "2010-03-28",
+ "2010-03-28", "2006-03-19", "2006-03-19", "2009-02-01", "2009-02-01",
+ "2012-04-01", null])";
+ const char* ceil_15_months =
+ R"(["1971-04-01", "2001-04-01", "1900-01-01", "2033-10-01", "2021-04-01",
+ "2020-01-01", "2020-01-01", "2010-01-01", "2011-04-01", "2011-04-01",
+ "2011-04-01", "2006-04-01", "2006-04-01", "2010-01-01", "2010-01-01",
+ "2012-07-01", null])";
+ const char* ceil_15_quarters =
+ R"(["1973-10-01", "2003-10-01", "1902-07-01", "2033-10-01", "2022-07-01",
+ "2022-07-01", "2022-07-01", "2011-04-01", "2011-04-01", "2011-04-01",
+ "2011-04-01", "2007-07-01", "2007-07-01", "2011-04-01", "2011-04-01",
+ "2015-01-01", null])";
+ const char* ceil_15_years =
+ R"(["1980-01-01", "2010-01-01", "1905-01-01", "2040-01-01", "2025-01-01",
+ "2025-01-01", "2025-01-01", "2010-01-01", "2025-01-01", "2025-01-01",
+ "2025-01-01", "2010-01-01", "2010-01-01", "2010-01-01", "2010-01-01",
+ "2025-01-01", null])";
+
+ auto unit = timestamp(TimeUnit::NANO, "UTC");
+ CheckScalarUnary(op, unit, times, unit, ceil_1_nanosecond, &round_to_1_nanoseconds);
+ CheckScalarUnary(op, unit, times, unit, ceil_1_microsecond, &round_to_1_microseconds);
+ CheckScalarUnary(op, unit, times, unit, ceil_1_millisecond, &round_to_1_milliseconds);
+ CheckScalarUnary(op, unit, times, unit, ceil_1_second, &round_to_1_seconds);
+ CheckScalarUnary(op, unit, times, unit, ceil_1_minute, &round_to_1_minutes);
+ CheckScalarUnary(op, unit, times, unit, ceil_1_hour, &round_to_1_hours);
+ CheckScalarUnary(op, unit, times, unit, ceil_1_day, &round_to_1_days);
+ CheckScalarUnary(op, unit, times, unit, ceil_1_weeks, &round_to_1_weeks);
+ CheckScalarUnary(op, unit, times, unit, ceil_1_weeks_sunday, &round_to_1_weeks_sunday);
+ CheckScalarUnary(op, unit, times, unit, ceil_1_months, &round_to_1_months);
+ CheckScalarUnary(op, unit, times, unit, ceil_1_quarters, &round_to_1_quarters);
+ CheckScalarUnary(op, unit, times, unit, ceil_1_years, &round_to_1_years);
+
+ CheckScalarUnary(op, unit, times, unit, ceil_15_nanosecond, &round_to_15_nanoseconds);
+ CheckScalarUnary(op, unit, times, unit, ceil_15_microsecond, &round_to_15_microseconds);
+ CheckScalarUnary(op, unit, times, unit, ceil_15_millisecond, &round_to_15_milliseconds);
+ CheckScalarUnary(op, unit, times, unit, ceil_13_second, &round_to_13_seconds);
+ CheckScalarUnary(op, unit, times, unit, ceil_13_minute, &round_to_13_minutes);
+ CheckScalarUnary(op, unit, times, unit, ceil_15_hour, &round_to_15_hours);
+ CheckScalarUnary(op, unit, times, unit, ceil_15_day, &round_to_15_days);
+ CheckScalarUnary(op, unit, times, unit, ceil_15_weeks, &round_to_15_weeks);
+ CheckScalarUnary(op, unit, times, unit, ceil_15_weeks_sunday,
+ &round_to_15_weeks_sunday);
+ CheckScalarUnary(op, unit, times, unit, ceil_15_months, &round_to_15_months);
+ CheckScalarUnary(op, unit, times, unit, ceil_15_quarters, &round_to_15_quarters);
+ CheckScalarUnary(op, unit, times, unit, ceil_15_years, &round_to_15_years);
+}
+
+TEST_F(ScalarTemporalTest, TestCeilTemporalMultipleSinceGreaterUnit) {
+ std::string op = "ceil_temporal";
+ RoundTemporalOptions round_to_15_nanoseconds =
+ RoundTemporalOptions(15, CalendarUnit::NANOSECOND, true, true, true);
+ RoundTemporalOptions round_to_15_microseconds =
+ RoundTemporalOptions(15, CalendarUnit::MICROSECOND, true, true, true);
+ RoundTemporalOptions round_to_15_milliseconds =
+ RoundTemporalOptions(15, CalendarUnit::MILLISECOND, true, true, true);
+ RoundTemporalOptions round_to_13_seconds =
+ RoundTemporalOptions(13, CalendarUnit::SECOND, true, true, true);
+ RoundTemporalOptions round_to_13_minutes =
+ RoundTemporalOptions(13, CalendarUnit::MINUTE, true, true, true);
+ RoundTemporalOptions round_to_15_hours =
+ RoundTemporalOptions(15, CalendarUnit::HOUR, true, true, true);
+ RoundTemporalOptions round_to_15_days =
+ RoundTemporalOptions(15, CalendarUnit::DAY, true, true, true);
+ RoundTemporalOptions round_to_15_weeks =
+ RoundTemporalOptions(15, CalendarUnit::WEEK, true, true, true);
+ RoundTemporalOptions round_to_15_weeks_sunday =
+ RoundTemporalOptions(15, CalendarUnit::WEEK, false, true, true);
+ RoundTemporalOptions round_to_15_months =
+ RoundTemporalOptions(15, CalendarUnit::MONTH, true, true, true);
+ RoundTemporalOptions round_to_15_quarters =
+ RoundTemporalOptions(15, CalendarUnit::QUARTER, true, true, true);
+ RoundTemporalOptions round_to_15_years =
+ RoundTemporalOptions(15, CalendarUnit::YEAR, true, true, true);
+
+ // Data for tests below was generaed via lubridate with the exception
+ // of week data because lubridate currently does not support rounding to
+ // multiple of week.
+ const char* ceil_15_nanosecond =
+ R"(["1970-01-01 00:00:59.123456795", "2000-02-29 23:23:24.000000005",
+ "1899-01-01 00:59:20.001001015", "2033-05-18 03:33:20.000000015",
+ "2020-01-01 01:05:05.001000015", "2019-12-31 02:10:10.002000015",
+ "2019-12-30 03:15:15.003000015", "2009-12-31 04:20:20.004132015",
+ "2010-01-01 05:25:25.005321015", "2010-01-03 06:30:30.006163015",
+ "2010-01-04 07:35:35.000000015", "2006-01-01 08:40:40.000000015",
+ "2005-12-31 09:45:45.000000015", "2008-12-28 00:00:00.000000015",
+ "2008-12-29 00:00:00.000000015", "2012-01-01 01:02:03.000000015", null])";
+ const char* ceil_15_microsecond =
+ R"(["1970-01-01 00:00:59.123465", "2000-02-29 23:23:24.000005",
+ "1899-01-01 00:59:20.001015", "2033-05-18 03:33:20.000015",
+ "2020-01-01 01:05:05.001015", "2019-12-31 02:10:10.002015",
+ "2019-12-30 03:15:15.003015", "2009-12-31 04:20:20.004135",
+ "2010-01-01 05:25:25.005330", "2010-01-03 06:30:30.006165",
+ "2010-01-04 07:35:35.000015", "2006-01-01 08:40:40.000015",
+ "2005-12-31 09:45:45.000015", "2008-12-28 00:00:00.000015",
+ "2008-12-29 00:00:00.000015", "2012-01-01 01:02:03.000015", null])";
+ const char* ceil_15_millisecond =
+ R"(["1970-01-01 00:00:59.135", "2000-02-29 23:23:24.005",
+ "1899-01-01 00:59:20.015", "2033-05-18 03:33:20.015",
+ "2020-01-01 01:05:05.015", "2019-12-31 02:10:10.015",
+ "2019-12-30 03:15:15.015", "2009-12-31 04:20:20.015",
+ "2010-01-01 05:25:25.015", "2010-01-03 06:30:30.015",
+ "2010-01-04 07:35:35.015", "2006-01-01 08:40:40.015",
+ "2005-12-31 09:45:45.015", "2008-12-28 00:00:00.015",
+ "2008-12-29 00:00:00.015", "2012-01-01 01:02:03.015", null])";
+ const char* ceil_13_second =
+ R"(["1970-01-01 00:01:05", "2000-02-29 23:23:26", "1899-01-01 00:59:26",
+ "2033-05-18 03:33:26", "2020-01-01 01:05:13", "2019-12-31 02:10:13",
+ "2019-12-30 03:15:26", "2009-12-31 04:20:26", "2010-01-01 05:25:26",
+ "2010-01-03 06:30:39", "2010-01-04 07:35:39", "2006-01-01 08:40:52",
+ "2005-12-31 09:45:52", "2008-12-28 00:00:13", "2008-12-29 00:00:13",
+ "2012-01-01 01:02:13", null])";
+ const char* ceil_13_minute =
+ R"(["1970-01-01 00:13:00", "2000-02-29 23:26:00", "1899-01-01 01:05:00",
+ "2033-05-18 03:39:00", "2020-01-01 01:13:00", "2019-12-31 02:13:00",
+ "2019-12-30 03:26:00", "2009-12-31 04:26:00", "2010-01-01 05:26:00",
+ "2010-01-03 06:39:00", "2010-01-04 07:39:00", "2006-01-01 08:52:00",
+ "2005-12-31 09:52:00", "2008-12-28 00:13:00", "2008-12-29 00:13:00",
+ "2012-01-01 01:13:00", null])";
+ const char* ceil_15_hour =
+ R"(["1970-01-01 15:00:00", "2000-03-01 06:00:00", "1899-01-01 15:00:00",
+ "2033-05-18 15:00:00", "2020-01-01 15:00:00", "2019-12-31 15:00:00",
+ "2019-12-30 15:00:00", "2009-12-31 15:00:00", "2010-01-01 15:00:00",
+ "2010-01-03 15:00:00", "2010-01-04 15:00:00", "2006-01-01 15:00:00",
+ "2005-12-31 15:00:00", "2008-12-28 15:00:00", "2008-12-29 15:00:00",
+ "2012-01-01 15:00:00", null])";
+ const char* ceil_15_day =
+ R"(["1970-01-16", "2000-03-02", "1899-01-16", "2033-05-31",
+ "2020-01-16", "2020-01-15", "2019-12-31", "2010-01-15",
+ "2010-01-16", "2010-01-16", "2010-01-16", "2006-01-16",
+ "2006-01-15", "2008-12-31", "2008-12-31", "2012-01-16", null])";
+ const char* ceil_15_weeks =
+ R"(["1970-04-13", "2000-04-17", "1899-04-17", "2033-08-01", "2020-04-13",
+ "2020-04-13", "2020-04-13", "2010-04-19", "2010-04-19", "2010-04-19",
+ "2010-04-19", "2006-04-17", "2006-04-17", "2009-02-23", "2009-04-13",
+ "2012-04-16", null])";
+ const char* ceil_15_weeks_sunday =
+ R"(["1970-04-19", "2000-04-16", "1899-04-16", "2033-07-31", "2020-04-12",
+ "2020-04-12", "2020-04-12", "2010-04-18", "2010-04-18", "2010-04-18",
+ "2010-04-18", "2006-04-16", "2006-04-16", "2009-04-19", "2009-04-19",
+ "2012-04-15", null])";
+ const char* ceil_15_months =
+ R"(["1971-04-01", "2001-04-01", "1900-04-01", "2034-04-01",
+ "2021-04-01", "2020-04-01", "2020-04-01", "2010-04-01",
+ "2011-04-01", "2011-04-01", "2011-04-01", "2007-04-01",
+ "2006-04-01", "2009-04-01", "2009-04-01", "2013-04-01", null])";
+ const char* ceil_15_quarters =
+ R"(["1973-10-01", "2003-10-01", "1902-10-01", "2036-10-01",
+ "2023-10-01", "2022-10-01", "2022-10-01", "2012-10-01",
+ "2013-10-01", "2013-10-01", "2013-10-01", "2009-10-01",
+ "2008-10-01", "2011-10-01", "2011-10-01", "2015-10-01", null])";
+ const char* ceil_15_years =
+ R"(["1980-01-01", "2010-01-01", "1905-01-01", "2040-01-01",
+ "2025-01-01", "2025-01-01", "2025-01-01", "2010-01-01",
+ "2025-01-01", "2025-01-01", "2025-01-01", "2010-01-01",
+ "2010-01-01", "2010-01-01", "2010-01-01", "2025-01-01", null])";
+
+ auto unit = timestamp(TimeUnit::NANO, "UTC");
+ CheckScalarUnary(op, unit, times, unit, ceil_15_nanosecond, &round_to_15_nanoseconds);
+ CheckScalarUnary(op, unit, times, unit, ceil_15_microsecond, &round_to_15_microseconds);
+ CheckScalarUnary(op, unit, times, unit, ceil_15_millisecond, &round_to_15_milliseconds);
+ CheckScalarUnary(op, unit, times, unit, ceil_13_second, &round_to_13_seconds);
+ CheckScalarUnary(op, unit, times, unit, ceil_13_minute, &round_to_13_minutes);
+ CheckScalarUnary(op, unit, times, unit, ceil_15_hour, &round_to_15_hours);
+ CheckScalarUnary(op, unit, times, unit, ceil_15_day, &round_to_15_days);
+ CheckScalarUnary(op, unit, times, unit, ceil_15_weeks, &round_to_15_weeks);
+ CheckScalarUnary(op, unit, times, unit, ceil_15_weeks_sunday,
+ &round_to_15_weeks_sunday);
+ CheckScalarUnary(op, unit, times, unit, ceil_15_months, &round_to_15_months);
+ CheckScalarUnary(op, unit, times, unit, ceil_15_quarters, &round_to_15_quarters);
+ CheckScalarUnary(op, unit, times, unit, ceil_15_years, &round_to_15_years);
+}
+
TEST_F(ScalarTemporalTest, TestFloorTemporal) {
std::string op = "floor_temporal";
const char* floor_1_nanosecond =
@@ -2420,6 +2784,133 @@ TEST_F(ScalarTemporalTest, TestFloorTemporal) {
CheckScalarUnary(op, unit, times, unit, floor_15_years, &round_to_15_years);
}
+TEST_F(ScalarTemporalTest, TestFloorTemporalMultipleSinceGreaterUnit) {
+ std::string op = "floor_temporal";
+ RoundTemporalOptions round_to_15_nanoseconds =
+ RoundTemporalOptions(15, CalendarUnit::NANOSECOND, true, true, true);
+ RoundTemporalOptions round_to_15_microseconds =
+ RoundTemporalOptions(15, CalendarUnit::MICROSECOND, true, true, true);
+ RoundTemporalOptions round_to_15_milliseconds =
+ RoundTemporalOptions(15, CalendarUnit::MILLISECOND, true, true, true);
+ RoundTemporalOptions round_to_13_seconds =
+ RoundTemporalOptions(13, CalendarUnit::SECOND, true, true, true);
+ RoundTemporalOptions round_to_13_minutes =
+ RoundTemporalOptions(13, CalendarUnit::MINUTE, true, true, true);
+ RoundTemporalOptions round_to_15_hours =
+ RoundTemporalOptions(15, CalendarUnit::HOUR, true, true, true);
+ RoundTemporalOptions round_to_15_days =
+ RoundTemporalOptions(15, CalendarUnit::DAY, true, true, true);
+ RoundTemporalOptions round_to_15_weeks =
+ RoundTemporalOptions(15, CalendarUnit::WEEK, true, true, true);
+ RoundTemporalOptions round_to_15_weeks_sunday =
+ RoundTemporalOptions(15, CalendarUnit::WEEK, false, true, true);
+ RoundTemporalOptions round_to_15_months =
+ RoundTemporalOptions(15, CalendarUnit::MONTH, true, true, true);
+ RoundTemporalOptions round_to_15_quarters =
+ RoundTemporalOptions(15, CalendarUnit::QUARTER, true, true, true);
+ RoundTemporalOptions round_to_15_years =
+ RoundTemporalOptions(15, CalendarUnit::YEAR, true, true, true);
+
+ // Data for tests below was generaed via lubridate with the exception
+ // of week data because lubridate currently does not support rounding to
+ // multiple of week.
+ const char* floor_15_nanosecond =
+ R"(["1970-01-01 00:00:59.123456780", "2000-02-29 23:23:23.999999990",
+ "1899-01-01 00:59:20.001001000", "2033-05-18 03:33:20.000000000",
+ "2020-01-01 01:05:05.001000000", "2019-12-31 02:10:10.002000000",
+ "2019-12-30 03:15:15.003000000", "2009-12-31 04:20:20.004132000",
+ "2010-01-01 05:25:25.005321000", "2010-01-03 06:30:30.006163000",
+ "2010-01-04 07:35:35.000000000", "2006-01-01 08:40:40.000000000",
+ "2005-12-31 09:45:45.000000000", "2008-12-28 00:00:00.000000000",
+ "2008-12-29 00:00:00.000000000", "2012-01-01 01:02:03.000000000", null])";
+ const char* floor_15_microsecond =
+ R"(["1970-01-01 00:00:59.123450", "2000-02-29 23:23:23.999990",
+ "1899-01-01 00:59:20.001000", "2033-05-18 03:33:20.000000",
+ "2020-01-01 01:05:05.001000", "2019-12-31 02:10:10.002000",
+ "2019-12-30 03:15:15.003000", "2009-12-31 04:20:20.004120",
+ "2010-01-01 05:25:25.005315", "2010-01-03 06:30:30.006150",
+ "2010-01-04 07:35:35.000000", "2006-01-01 08:40:40.000000",
+ "2005-12-31 09:45:45.000000", "2008-12-28 00:00:00.000000",
+ "2008-12-29 00:00:00.000000", "2012-01-01 01:02:03.000000", null])";
+ const char* floor_15_millisecond =
+ R"(["1970-01-01 00:00:59.120", "2000-02-29 23:23:23.990",
+ "1899-01-01 00:59:20.000", "2033-05-18 03:33:20.000",
+ "2020-01-01 01:05:05.000", "2019-12-31 02:10:10.000",
+ "2019-12-30 03:15:15.000", "2009-12-31 04:20:20.000",
+ "2010-01-01 05:25:25.000", "2010-01-03 06:30:30.000",
+ "2010-01-04 07:35:35.000", "2006-01-01 08:40:40.000",
+ "2005-12-31 09:45:45.000", "2008-12-28 00:00:00.000",
+ "2008-12-29 00:00:00.000", "2012-01-01 01:02:03.000", null])";
+ const char* floor_13_second =
+ R"(["1970-01-01 00:00:52", "2000-02-29 23:23:13", "1899-01-01 00:59:13",
+ "2033-05-18 03:33:13", "2020-01-01 01:05:00", "2019-12-31 02:10:00",
+ "2019-12-30 03:15:13", "2009-12-31 04:20:13", "2010-01-01 05:25:13",
+ "2010-01-03 06:30:26", "2010-01-04 07:35:26", "2006-01-01 08:40:39",
+ "2005-12-31 09:45:39", "2008-12-28 00:00:00", "2008-12-29 00:00:00",
+ "2012-01-01 01:02:00", null])";
+ const char* floor_13_minute =
+ R"(["1970-01-01 00:00:00", "2000-02-29 23:13:00", "1899-01-01 00:52:00",
+ "2033-05-18 03:26:00", "2020-01-01 01:00:00", "2019-12-31 02:00:00",
+ "2019-12-30 03:13:00", "2009-12-31 04:13:00", "2010-01-01 05:13:00",
+ "2010-01-03 06:26:00", "2010-01-04 07:26:00", "2006-01-01 08:39:00",
+ "2005-12-31 09:39:00", "2008-12-28 00:00:00", "2008-12-29 00:00:00",
+ "2012-01-01 01:00:00", null])";
+ const char* floor_15_hour =
+ R"(["1970-01-01 00:00:00", "2000-02-29 15:00:00", "1899-01-01 00:00:00",
+ "2033-05-18 00:00:00", "2020-01-01 00:00:00", "2019-12-31 00:00:00",
+ "2019-12-30 00:00:00", "2009-12-31 00:00:00", "2010-01-01 00:00:00",
+ "2010-01-03 00:00:00", "2010-01-04 00:00:00", "2006-01-01 00:00:00",
+ "2005-12-31 00:00:00", "2008-12-28 00:00:00", "2008-12-29 00:00:00",
+ "2012-01-01 00:00:00", null])";
+ const char* floor_15_day =
+ R"(["1970-01-01", "2000-02-16", "1899-01-01", "2033-05-16",
+ "2020-01-01", "2019-12-31", "2019-12-16", "2009-12-31",
+ "2010-01-01", "2010-01-01", "2010-01-01", "2006-01-01",
+ "2005-12-31", "2008-12-16", "2008-12-16", "2012-01-01", null])";
+ const char* floor_15_weeks =
+ R"(["1969-12-29", "2000-01-03", "1899-01-02", "2033-04-18",
+ "2019-12-30", "2019-12-30", "2019-12-30", "2010-01-04",
+ "2010-01-04", "2010-01-04", "2010-01-04", "2006-01-02",
+ "2006-01-02", "2008-11-10", "2008-12-29", "2012-01-02", null])";
+ const char* floor_15_weeks_sunday =
+ R"(["1970-01-04", "2000-01-02", "1899-01-01", "2033-04-17",
+ "2019-12-29", "2019-12-29", "2019-12-29", "2010-01-03",
+ "2010-01-03", "2010-01-03", "2010-01-03", "2006-01-01",
+ "2006-01-01", "2009-01-04", "2009-01-04", "2012-01-01", null])";
+ const char* floor_15_months =
+ R"(["1970-01-01", "2000-01-01", "1899-01-01", "2033-01-01",
+ "2020-01-01", "2019-01-01", "2019-01-01", "2009-01-01",
+ "2010-01-01", "2010-01-01", "2010-01-01", "2006-01-01",
+ "2005-01-01", "2008-01-01", "2008-01-01", "2012-01-01", null])";
+ const char* floor_15_quarters =
+ R"(["1970-01-01", "2000-01-01", "1899-01-01", "2033-01-01",
+ "2020-01-01", "2019-01-01", "2019-01-01", "2009-01-01",
+ "2010-01-01", "2010-01-01", "2010-01-01", "2006-01-01",
+ "2005-01-01", "2008-01-01", "2008-01-01", "2012-01-01", null])";
+ const char* floor_15_years =
+ R"(["1965-01-01", "1995-01-01", "1890-01-01", "2025-01-01",
+ "2010-01-01", "2010-01-01", "2010-01-01", "1995-01-01",
+ "2010-01-01", "2010-01-01", "2010-01-01", "1995-01-01",
+ "1995-01-01", "1995-01-01", "1995-01-01", "2010-01-01", null])";
+
+ auto unit = timestamp(TimeUnit::NANO, "UTC");
+ CheckScalarUnary(op, unit, times, unit, floor_15_nanosecond, &round_to_15_nanoseconds);
+ CheckScalarUnary(op, unit, times, unit, floor_15_microsecond,
+ &round_to_15_microseconds);
+ CheckScalarUnary(op, unit, times, unit, floor_15_millisecond,
+ &round_to_15_milliseconds);
+ CheckScalarUnary(op, unit, times, unit, floor_13_second, &round_to_13_seconds);
+ CheckScalarUnary(op, unit, times, unit, floor_13_minute, &round_to_13_minutes);
+ CheckScalarUnary(op, unit, times, unit, floor_15_hour, &round_to_15_hours);
+ CheckScalarUnary(op, unit, times, unit, floor_15_day, &round_to_15_days);
+ CheckScalarUnary(op, unit, times, unit, floor_15_weeks, &round_to_15_weeks);
+ CheckScalarUnary(op, unit, times, unit, floor_15_weeks_sunday,
+ &round_to_15_weeks_sunday);
+ CheckScalarUnary(op, unit, times, unit, floor_15_months, &round_to_15_months);
+ CheckScalarUnary(op, unit, times, unit, floor_15_quarters, &round_to_15_quarters);
+ CheckScalarUnary(op, unit, times, unit, floor_15_years, &round_to_15_years);
+}
+
TEST_F(ScalarTemporalTest, TestRoundTemporal) {
std::string op = "round_temporal";
const char* round_1_nanoseconds =
@@ -2632,6 +3123,132 @@ TEST_F(ScalarTemporalTest, TestCeilFloorRoundTemporalBrussels) {
CheckScalarUnary("round_temporal", unit, times, unit, round_2_hours, &round_to_2_hours);
}
+TEST_F(ScalarTemporalTest, TestRoundTemporalMultipleSinceGreaterUnit) {
+ std::string op = "round_temporal";
+ RoundTemporalOptions round_to_15_nanoseconds =
+ RoundTemporalOptions(15, CalendarUnit::NANOSECOND, true, true, true);
+ RoundTemporalOptions round_to_15_microseconds =
+ RoundTemporalOptions(15, CalendarUnit::MICROSECOND, true, true, true);
+ RoundTemporalOptions round_to_15_milliseconds =
+ RoundTemporalOptions(15, CalendarUnit::MILLISECOND, true, true, true);
+ RoundTemporalOptions round_to_13_seconds =
+ RoundTemporalOptions(13, CalendarUnit::SECOND, true, true, true);
+ RoundTemporalOptions round_to_13_minutes =
+ RoundTemporalOptions(13, CalendarUnit::MINUTE, true, true, true);
+ RoundTemporalOptions round_to_15_hours =
+ RoundTemporalOptions(15, CalendarUnit::HOUR, true, true, true);
+ RoundTemporalOptions round_to_15_days =
+ RoundTemporalOptions(15, CalendarUnit::DAY, true, true, true);
+ RoundTemporalOptions round_to_15_weeks =
+ RoundTemporalOptions(15, CalendarUnit::WEEK, true, true, true);
+ RoundTemporalOptions round_to_15_weeks_sunday =
+ RoundTemporalOptions(15, CalendarUnit::WEEK, false, true, true);
+ RoundTemporalOptions round_to_5_months =
+ RoundTemporalOptions(5, CalendarUnit::MONTH, true, true, true);
+ RoundTemporalOptions round_to_15_quarters =
+ RoundTemporalOptions(15, CalendarUnit::QUARTER, true, true, true);
+ RoundTemporalOptions round_to_15_years =
+ RoundTemporalOptions(15, CalendarUnit::YEAR, true, true, true);
+
+ // Data for tests below was generaed via lubridate with the exception
+ // of week data because lubridate currently does not support rounding to
+ // multiple of week.
+ const char* round_15_nanosecond =
+ R"(["1970-01-01 00:00:59.123456795", "2000-02-29 23:23:24.000000005",
+ "1899-01-01 00:59:20.001001000", "2033-05-18 03:33:20.000000000",
+ "2020-01-01 01:05:05.001000000", "2019-12-31 02:10:10.002000000",
+ "2019-12-30 03:15:15.003000000", "2009-12-31 04:20:20.004132000",
+ "2010-01-01 05:25:25.005321000", "2010-01-03 06:30:30.006163000",
+ "2010-01-04 07:35:35.000000000", "2006-01-01 08:40:40.000000000",
+ "2005-12-31 09:45:45.000000000", "2008-12-28 00:00:00.000000000",
+ "2008-12-29 00:00:00.000000000", "2012-01-01 01:02:03.000000000", null])";
+ const char* round_15_microsecond =
+ R"(["1970-01-01 00:00:59.123450", "2000-02-29 23:23:24.000005",
+ "1899-01-01 00:59:20.001000", "2033-05-18 03:33:20.000000",
+ "2020-01-01 01:05:05.001000", "2019-12-31 02:10:10.002000",
+ "2019-12-30 03:15:15.003000", "2009-12-31 04:20:20.004135",
+ "2010-01-01 05:25:25.005315", "2010-01-03 06:30:30.006165",
+ "2010-01-04 07:35:35.000000", "2006-01-01 08:40:40.000000",
+ "2005-12-31 09:45:45.000000", "2008-12-28 00:00:00.000000",
+ "2008-12-29 00:00:00.000000", "2012-01-01 01:02:03.000000", null])";
+ const char* round_15_millisecond =
+ R"(["1970-01-01 00:00:59.120", "2000-02-29 23:23:24.005",
+ "1899-01-01 00:59:20.000", "2033-05-18 03:33:20.000",
+ "2020-01-01 01:05:05.000", "2019-12-31 02:10:10.000",
+ "2019-12-30 03:15:15.000", "2009-12-31 04:20:20.000",
+ "2010-01-01 05:25:25.000", "2010-01-03 06:30:30.000",
+ "2010-01-04 07:35:35.000", "2006-01-01 08:40:40.000",
+ "2005-12-31 09:45:45.000", "2008-12-28 00:00:00.000",
+ "2008-12-29 00:00:00.000", "2012-01-01 01:02:03.000", null])";
+ const char* round_13_second =
+ R"(["1970-01-01 00:01:05", "2000-02-29 23:23:26", "1899-01-01 00:59:26",
+ "2033-05-18 03:33:26", "2020-01-01 01:05:00", "2019-12-31 02:10:13",
+ "2019-12-30 03:15:13", "2009-12-31 04:20:26", "2010-01-01 05:25:26",
+ "2010-01-03 06:30:26", "2010-01-04 07:35:39", "2006-01-01 08:40:39",
+ "2005-12-31 09:45:39", "2008-12-28 00:00:00", "2008-12-29 00:00:00",
+ "2012-01-01 01:02:00", null])";
+ const char* round_13_minute =
+ R"(["1970-01-01 00:00:00", "2000-02-29 23:26:00", "1899-01-01 01:05:00",
+ "2033-05-18 03:39:00", "2020-01-01 01:00:00", "2019-12-31 02:13:00",
+ "2019-12-30 03:13:00", "2009-12-31 04:26:00", "2010-01-01 05:26:00",
+ "2010-01-03 06:26:00", "2010-01-04 07:39:00", "2006-01-01 08:39:00",
+ "2005-12-31 09:52:00", "2008-12-28 00:00:00", "2008-12-29 00:00:00",
+ "2012-01-01 01:00:00", null])";
+ const char* round_15_hour =
+ R"(["1970-01-01 00:00:00", "2000-03-01 06:00:00", "1899-01-01 00:00:00",
+ "2033-05-18 00:00:00", "2020-01-01 00:00:00", "2019-12-31 00:00:00",
+ "2019-12-30 00:00:00", "2009-12-31 00:00:00", "2010-01-01 00:00:00",
+ "2010-01-03 00:00:00", "2010-01-04 15:00:00", "2006-01-01 15:00:00",
+ "2005-12-31 15:00:00", "2008-12-28 00:00:00", "2008-12-29 00:00:00",
+ "2012-01-01 00:00:00", null])";
+ const char* round_15_day =
+ R"(["1970-01-01", "2000-03-02", "1899-01-01", "2033-05-16",
+ "2020-01-01", "2019-12-31", "2019-12-31", "2009-12-31",
+ "2010-01-01", "2010-01-01", "2010-01-01", "2006-01-01",
+ "2005-12-31", "2008-12-31", "2008-12-31", "2012-01-01", null])";
+ const char* round_15_weeks =
+ R"(["1969-12-29", "2000-04-17", "1899-01-02", "2033-04-18",
+ "2019-12-30", "2019-12-30", "2019-12-30", "2010-01-04",
+ "2010-01-04", "2010-01-04", "2010-01-04", "2006-01-02",
+ "2006-01-02", "2008-11-10", "2008-12-29", "2012-01-02", null])";
+ const char* round_15_weeks_sunday =
+ R"(["1970-01-04", "2000-04-16", "1899-01-01", "2033-04-17",
+ "2019-12-29", "2019-12-29", "2019-12-29", "2010-01-03",
+ "2010-01-03", "2010-01-03", "2010-01-03", "2006-01-01",
+ "2006-01-01", "2009-01-04", "2009-01-04", "2012-01-01", null])";
+ const char* round_5_months =
+ R"(["1970-01-01", "2000-01-01", "1899-01-01", "2033-06-01",
+ "2020-01-01", "2019-11-01", "2019-11-01", "2009-11-01",
+ "2010-01-01", "2010-01-01", "2010-01-01", "2006-01-01",
+ "2005-11-01", "2008-11-01", "2008-11-01", "2012-01-01", null])";
+ const char* round_15_quarters =
+ R"(["1970-01-01", "2000-01-01", "1899-01-01", "2033-01-01",
+ "2020-01-01", "2019-01-01", "2019-01-01", "2009-01-01",
+ "2010-01-01", "2010-01-01", "2010-01-01", "2006-01-01",
+ "2005-01-01", "2008-01-01", "2008-01-01", "2012-01-01", null])";
+ const char* round_15_years =
+ R"(["1965-01-01", "1995-01-01", "1905-01-01", "2040-01-01",
+ "2025-01-01", "2025-01-01", "2025-01-01", "2010-01-01",
+ "2010-01-01", "2010-01-01", "2010-01-01", "2010-01-01",
+ "2010-01-01", "2010-01-01", "2010-01-01", "2010-01-01", null])";
+ auto unit = timestamp(TimeUnit::NANO, "UTC");
+ CheckScalarUnary(op, unit, times, unit, round_15_nanosecond, &round_to_15_nanoseconds);
+ CheckScalarUnary(op, unit, times, unit, round_15_microsecond,
+ &round_to_15_microseconds);
+ CheckScalarUnary(op, unit, times, unit, round_15_millisecond,
+ &round_to_15_milliseconds);
+ CheckScalarUnary(op, unit, times, unit, round_13_second, &round_to_13_seconds);
+ CheckScalarUnary(op, unit, times, unit, round_13_minute, &round_to_13_minutes);
+ CheckScalarUnary(op, unit, times, unit, round_15_hour, &round_to_15_hours);
+ CheckScalarUnary(op, unit, times, unit, round_15_day, &round_to_15_days);
+ CheckScalarUnary(op, unit, times, unit, round_15_weeks, &round_to_15_weeks);
+ CheckScalarUnary(op, unit, times, unit, round_15_weeks_sunday,
+ &round_to_15_weeks_sunday);
+ CheckScalarUnary(op, unit, times, unit, round_5_months, &round_to_5_months);
+ CheckScalarUnary(op, unit, times, unit, round_15_quarters, &round_to_15_quarters);
+ CheckScalarUnary(op, unit, times, unit, round_15_years, &round_to_15_years);
+}
+
TEST_F(ScalarTemporalTest, TestCeilFloorRoundTemporalKolkata) {
// Kolkata timezone was defined as UTC+5:21:10 from 1871 to 1906 when it changed to
// IST (UTC+05:30) without DST. This test is to check rounding is done in historical
diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc
index 7484de2a00..6275de9481 100644
--- a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc
@@ -689,12 +689,36 @@ struct IsDaylightSavings {
// Round temporal values to given frequency
template <typename Duration, typename Localizer>
-year_month_day GetFlooredYmd(int64_t arg, int multiple, Localizer localizer_) {
+year_month_day GetFlooredYmd(int64_t arg, const int multiple,
+ const RoundTemporalOptions& options, Localizer localizer_) {
year_month_day ymd{floor<days>(localizer_.template ConvertTimePoint<Duration>(arg))};
if (multiple == 1) {
+ // Round to a multiple of months since epoch start (1970-01-01 00:00:00).
return year_month_day(ymd.year() / ymd.month() / 1);
+ } else if (options.calendar_based_origin) {
+ // Round to a multiple of months since the last year.
+ //
+ // Note: compute::CalendarUnit::YEAR is the greatest unit so there is no logical time
+ // point to use as origin. compute::CalendarUnit::DAY is covered by FloorTimePoint.
+ // Therefore compute::CalendarUnit::YEAR and compute::CalendarUnit::DAY are not
+ // covered here.
+ switch (options.unit) {
+ case compute::CalendarUnit::MONTH: {
+ const auto m =
+ static_cast<uint32_t>(ymd.month()) / options.multiple * options.multiple;
+ return year_month_day(ymd.year() / 1 / 1) + months{m};
+ }
+ case compute::CalendarUnit::QUARTER: {
+ const auto m = static_cast<uint32_t>(ymd.month()) / (options.multiple * 3) *
+ (options.multiple * 3);
+ return year_month_day(ymd.year() / 1 / 1) + months{m};
+ }
+ default:
+ return ymd;
+ }
} else {
+ // Round to month * options.multiple since epoch start (1970-01-01 00:00:00).
int32_t total_months_origin = 1970 * 12;
int32_t total_months = static_cast<int32_t>(ymd.year()) * 12 +
static_cast<int32_t>(static_cast<uint32_t>(ymd.month())) - 1 -
@@ -705,21 +729,71 @@ year_month_day GetFlooredYmd(int64_t arg, int multiple, Localizer localizer_) {
} else {
total_months = (total_months - multiple + 1) / multiple * multiple;
}
- return year_month_day(year{1970} / jan / 0) + months{total_months};
+ return year_month_day(year{1970} / jan / 1) + months{total_months};
}
}
template <typename Duration, typename Unit, typename Localizer>
-const Duration FloorTimePoint(const int64_t arg, const int64_t multiple,
+const Duration FloorTimePoint(const int64_t arg, const RoundTemporalOptions& options,
Localizer localizer_, Status* st) {
const auto t = localizer_.template ConvertTimePoint<Duration>(arg);
- const Unit d = floor<Unit>(t).time_since_epoch();
- if (multiple == 1) {
+ if (options.multiple == 1) {
+ // Round to a multiple of unit since epoch start (1970-01-01 00:00:00).
+ const Unit d = floor<Unit>(t).time_since_epoch();
return localizer_.template ConvertLocalToSys<Duration>(duration_cast<Duration>(d),
st);
+ } else if (options.calendar_based_origin) {
+ // Round to a multiple of units since the last greater unit.
+ // For example: round to multiple of days since the beginning of the month or
+ // to hours since the beginning of the day.
+ const Unit unit = Unit{options.multiple};
+ Duration origin;
+
+ switch (options.unit) {
+ case compute::CalendarUnit::DAY:
+ origin = duration_cast<Duration>(
+ localizer_
+ .ConvertDays(year_month_day(floor<days>(t)).year() /
+ year_month_day(floor<days>(t)).month() / 1)
+ .time_since_epoch());
+ break;
+ case compute::CalendarUnit::HOUR:
+ origin = duration_cast<Duration>(
+ localizer_.ConvertDays(year_month_day(floor<days>(t))).time_since_epoch());
+ break;
+ case compute::CalendarUnit::MINUTE:
+ origin = duration_cast<Duration>(floor<std::chrono::hours>(t).time_since_epoch());
+ break;
+ case compute::CalendarUnit::SECOND:
+ origin =
+ duration_cast<Duration>(floor<std::chrono::minutes>(t).time_since_epoch());
+ break;
+ case compute::CalendarUnit::MILLISECOND:
+ origin =
+ duration_cast<Duration>(floor<std::chrono::seconds>(t).time_since_epoch());
+ break;
+ case compute::CalendarUnit::MICROSECOND:
+ origin = duration_cast<Duration>(
+ floor<std::chrono::milliseconds>(t).time_since_epoch());
+ break;
+ case compute::CalendarUnit::NANOSECOND:
+ origin = duration_cast<Duration>(
+ floor<std::chrono::microseconds>(t).time_since_epoch());
+ break;
+ default: {
+ *st = Status::Invalid("Cannot floor to ", &options.unit);
+ return Duration{0};
+ }
+ }
+ const Duration m =
+ duration_cast<Duration>(((t - origin).time_since_epoch() / unit * unit + origin));
+ return localizer_.template ConvertLocalToSys<Duration>(m, st);
} else {
- const Unit unit = Unit{multiple};
+ // Round to a multiple of units * options.multiple since epoch start
+ // (1970-01-01 00:00:00).
+ const Unit d = floor<Unit>(t).time_since_epoch();
+ const Unit unit = Unit{options.multiple};
const Unit m =
(d.count() >= 0) ? d / unit * unit : (d - unit + Unit{1}) / unit * unit;
return localizer_.template ConvertLocalToSys<Duration>(duration_cast<Duration>(m),
@@ -728,18 +802,35 @@ const Duration FloorTimePoint(const int64_t arg, const int64_t multiple,
}
template <typename Duration, typename Localizer>
-const Duration FloorWeekTimePoint(const int64_t arg, const int64_t multiple,
+const Duration FloorWeekTimePoint(const int64_t arg, const RoundTemporalOptions& options,
Localizer localizer_, const Duration weekday_offset,
Status* st) {
const auto t = localizer_.template ConvertTimePoint<Duration>(arg) + weekday_offset;
const weeks d = floor<weeks>(t).time_since_epoch();
- if (multiple == 1) {
+ if (options.multiple == 1) {
+ // Round to a multiple of weeks since epoch start (1970-01-01 00:00:00).
return localizer_.template ConvertLocalToSys<Duration>(duration_cast<Duration>(d),
st) -
weekday_offset;
+ } else if (options.calendar_based_origin) {
+ // Round to a multiple of weeks since year prior.
+ weekday wd_;
+ if (options.week_starts_monday) {
+ wd_ = thu;
+ } else {
+ wd_ = wed;
+ }
+ const auto y = year_month_day{floor<days>(t)}.year();
+ const auto start =
+ localizer_.ConvertDays((y - years{1}) / dec / wd_[last]) + (mon - thu);
+ const weeks unit = weeks{options.multiple};
+ const auto m = (t - start) / unit * unit + start;
+ return localizer_.template ConvertLocalToSys<Duration>(m.time_since_epoch(), st);
} else {
- const weeks unit = weeks{multiple};
+ // Round to a multiple of weeks * options.multiple since epoch start
+ // (1970-01-01 00:00:00).
+ const weeks unit = weeks{options.multiple};
const weeks m =
(d.count() >= 0) ? d / unit * unit : (d - unit + weeks{1}) / unit * unit;
return localizer_.template ConvertLocalToSys<Duration>(duration_cast<Duration>(m),
@@ -749,55 +840,58 @@ const Duration FloorWeekTimePoint(const int64_t arg, const int64_t multiple,
}
template <typename Duration, typename Unit, typename Localizer>
-Duration CeilTimePoint(const int64_t arg, const int64_t multiple, Localizer localizer_,
- Status* st) {
+Duration CeilTimePoint(const int64_t arg, const RoundTemporalOptions& options,
+ Localizer localizer_, Status* st) {
const Duration f =
- FloorTimePoint<Duration, Unit, Localizer>(arg, multiple, localizer_, st);
+ FloorTimePoint<Duration, Unit, Localizer>(arg, options, localizer_, st);
const auto cl =
localizer_.template ConvertTimePoint<Duration>(f.count()).time_since_epoch();
const Duration cs =
localizer_.template ConvertLocalToSys<Duration>(duration_cast<Duration>(cl), st);
- if (cs >= Duration{arg}) {
- return cs;
+
+ if (options.ceil_is_strictly_greater || cs < Duration{arg}) {
+ return localizer_.template ConvertLocalToSys<Duration>(
+ duration_cast<Duration>(cl + duration_cast<Duration>(Unit{options.multiple})),
+ st);
}
- return localizer_.template ConvertLocalToSys<Duration>(
- duration_cast<Duration>(cl + duration_cast<Duration>(Unit{multiple})), st);
+ return cs;
}
template <typename Duration, typename Localizer>
-Duration CeilWeekTimePoint(const int64_t arg, const int64_t multiple,
+Duration CeilWeekTimePoint(const int64_t arg, const RoundTemporalOptions& options,
Localizer localizer_, const Duration weekday_offset,
Status* st) {
- const Duration f = FloorWeekTimePoint<Duration, Localizer>(arg, multiple, localizer_,
+ const Duration f = FloorWeekTimePoint<Duration, Localizer>(arg, options, localizer_,
weekday_offset, st);
const auto cl =
localizer_.template ConvertTimePoint<Duration>(f.count()).time_since_epoch();
const Duration cs =
localizer_.template ConvertLocalToSys<Duration>(duration_cast<Duration>(cl), st);
- if (cs >= Duration{arg}) {
- return cs;
+ if (options.ceil_is_strictly_greater || cs < Duration{arg}) {
+ return localizer_.template ConvertLocalToSys<Duration>(
+ duration_cast<Duration>(cl + duration_cast<Duration>(weeks{options.multiple})),
+ st);
}
- return localizer_.template ConvertLocalToSys<Duration>(
- duration_cast<Duration>(cl + duration_cast<Duration>(weeks{multiple})), st);
+ return cs;
}
template <typename Duration, typename Unit, typename Localizer>
-Duration RoundTimePoint(const int64_t arg, const int64_t multiple, Localizer localizer_,
- Status* st) {
+Duration RoundTimePoint(const int64_t arg, const RoundTemporalOptions& options,
+ Localizer localizer_, Status* st) {
const Duration f =
- FloorTimePoint<Duration, Unit, Localizer>(arg, multiple, localizer_, st);
+ FloorTimePoint<Duration, Unit, Localizer>(arg, options, localizer_, st);
const Duration c =
- CeilTimePoint<Duration, Unit, Localizer>(arg, multiple, localizer_, st);
+ CeilTimePoint<Duration, Unit, Localizer>(arg, options, localizer_, st);
return (Duration{arg} - f >= c - Duration{arg}) ? c : f;
}
template <typename Duration, typename Localizer>
-Duration RoundWeekTimePoint(const int64_t arg, const int64_t multiple,
+Duration RoundWeekTimePoint(const int64_t arg, const RoundTemporalOptions& options,
Localizer localizer_, const Duration weekday_offset,
Status* st) {
- const Duration f = FloorWeekTimePoint<Duration, Localizer>(arg, multiple, localizer_,
+ const Duration f = FloorWeekTimePoint<Duration, Localizer>(arg, options, localizer_,
weekday_offset, st);
- const Duration c = CeilWeekTimePoint<Duration, Localizer>(arg, multiple, localizer_,
+ const Duration c = CeilWeekTimePoint<Duration, Localizer>(arg, options, localizer_,
weekday_offset, st);
return (Duration{arg} - f >= c - Duration{arg}) ? c : f;
}
@@ -812,52 +906,50 @@ struct CeilTemporal {
Duration t;
switch (options.unit) {
case compute::CalendarUnit::NANOSECOND:
- t = CeilTimePoint<Duration, std::chrono::nanoseconds, Localizer>(
- arg, options.multiple, localizer_, st);
+ t = CeilTimePoint<Duration, std::chrono::nanoseconds, Localizer>(arg, options,
+ localizer_, st);
break;
case compute::CalendarUnit::MICROSECOND:
- t = CeilTimePoint<Duration, std::chrono::microseconds, Localizer>(
- arg, options.multiple, localizer_, st);
+ t = CeilTimePoint<Duration, std::chrono::microseconds, Localizer>(arg, options,
+ localizer_, st);
break;
case compute::CalendarUnit::MILLISECOND:
- t = CeilTimePoint<Duration, std::chrono::milliseconds, Localizer>(
- arg, options.multiple, localizer_, st);
+ t = CeilTimePoint<Duration, std::chrono::milliseconds, Localizer>(arg, options,
+ localizer_, st);
break;
case compute::CalendarUnit::SECOND:
- t = CeilTimePoint<Duration, std::chrono::seconds, Localizer>(
- arg, options.multiple, localizer_, st);
+ t = CeilTimePoint<Duration, std::chrono::seconds, Localizer>(arg, options,
+ localizer_, st);
break;
case compute::CalendarUnit::MINUTE:
- t = CeilTimePoint<Duration, minutes, Localizer>(arg, options.multiple, localizer_,
- st);
+ t = CeilTimePoint<Duration, minutes, Localizer>(arg, options, localizer_, st);
break;
case compute::CalendarUnit::HOUR:
- t = CeilTimePoint<Duration, std::chrono::hours, Localizer>(arg, options.multiple,
+ t = CeilTimePoint<Duration, std::chrono::hours, Localizer>(arg, options,
localizer_, st);
break;
case compute::CalendarUnit::DAY:
- t = CeilTimePoint<Duration, days, Localizer>(arg, options.multiple, localizer_,
- st);
+ t = CeilTimePoint<Duration, days, Localizer>(arg, options, localizer_, st);
break;
case compute::CalendarUnit::WEEK:
if (options.week_starts_monday) {
- t = CeilWeekTimePoint<Duration, Localizer>(arg, options.multiple, localizer_,
- days{3}, st);
+ t = CeilWeekTimePoint<Duration, Localizer>(arg, options, localizer_, days{3},
+ st);
} else {
- t = CeilWeekTimePoint<Duration, Localizer>(arg, options.multiple, localizer_,
- days{4}, st);
+ t = CeilWeekTimePoint<Duration, Localizer>(arg, options, localizer_, days{4},
+ st);
}
break;
case compute::CalendarUnit::MONTH: {
- year_month_day ymd =
- GetFlooredYmd<Duration, Localizer>(arg, options.multiple, localizer_);
+ year_month_day ymd = GetFlooredYmd<Duration, Localizer>(arg, options.multiple,
+ options, localizer_);
ymd += months{options.multiple};
t = localizer_.ConvertDays(ymd.year() / ymd.month() / 1).time_since_epoch();
break;
}
case compute::CalendarUnit::QUARTER: {
- year_month_day ymd =
- GetFlooredYmd<Duration, Localizer>(arg, 3 * options.multiple, localizer_);
+ year_month_day ymd = GetFlooredYmd<Duration, Localizer>(arg, 3 * options.multiple,
+ options, localizer_);
ymd += months{3 * options.multiple};
t = localizer_.ConvertDays(ymd.year() / ymd.month() / 1).time_since_epoch();
break;
@@ -890,51 +982,49 @@ struct FloorTemporal {
Duration t;
switch (options.unit) {
case compute::CalendarUnit::NANOSECOND:
- t = FloorTimePoint<Duration, std::chrono::nanoseconds, Localizer>(
- arg, options.multiple, localizer_, st);
+ t = FloorTimePoint<Duration, std::chrono::nanoseconds, Localizer>(arg, options,
+ localizer_, st);
break;
case compute::CalendarUnit::MICROSECOND:
t = FloorTimePoint<Duration, std::chrono::microseconds, Localizer>(
- arg, options.multiple, localizer_, st);
+ arg, options, localizer_, st);
break;
case compute::CalendarUnit::MILLISECOND:
t = FloorTimePoint<Duration, std::chrono::milliseconds, Localizer>(
- arg, options.multiple, localizer_, st);
+ arg, options, localizer_, st);
break;
case compute::CalendarUnit::SECOND:
- t = FloorTimePoint<Duration, std::chrono::seconds, Localizer>(
- arg, options.multiple, localizer_, st);
+ t = FloorTimePoint<Duration, std::chrono::seconds, Localizer>(arg, options,
+ localizer_, st);
break;
case compute::CalendarUnit::MINUTE:
- t = FloorTimePoint<Duration, minutes, Localizer>(arg, options.multiple,
- localizer_, st);
+ t = FloorTimePoint<Duration, minutes, Localizer>(arg, options, localizer_, st);
break;
case compute::CalendarUnit::HOUR:
- t = FloorTimePoint<Duration, std::chrono::hours, Localizer>(arg, options.multiple,
+ t = FloorTimePoint<Duration, std::chrono::hours, Localizer>(arg, options,
localizer_, st);
break;
case compute::CalendarUnit::DAY:
- t = FloorTimePoint<Duration, days, Localizer>(arg, options.multiple, localizer_,
- st);
+ t = FloorTimePoint<Duration, days, Localizer>(arg, options, localizer_, st);
break;
case compute::CalendarUnit::WEEK:
if (options.week_starts_monday) {
- t = FloorWeekTimePoint<Duration, Localizer>(arg, options.multiple, localizer_,
- days{3}, st);
+ t = FloorWeekTimePoint<Duration, Localizer>(arg, options, localizer_, days{3},
+ st);
} else {
- t = FloorWeekTimePoint<Duration, Localizer>(arg, options.multiple, localizer_,
- days{4}, st);
+ t = FloorWeekTimePoint<Duration, Localizer>(arg, options, localizer_, days{4},
+ st);
}
break;
case compute::CalendarUnit::MONTH: {
- year_month_day ymd =
- GetFlooredYmd<Duration, Localizer>(arg, options.multiple, localizer_);
+ year_month_day ymd = GetFlooredYmd<Duration, Localizer>(arg, options.multiple,
+ options, localizer_);
t = localizer_.ConvertDays(ymd.year() / ymd.month() / 1).time_since_epoch();
break;
}
case compute::CalendarUnit::QUARTER: {
- year_month_day ymd =
- GetFlooredYmd<Duration, Localizer>(arg, 3 * options.multiple, localizer_);
+ year_month_day ymd = GetFlooredYmd<Duration, Localizer>(arg, 3 * options.multiple,
+ options, localizer_);
t = localizer_.ConvertDays(ymd.year() / ymd.month() / 1).time_since_epoch();
break;
}
@@ -965,46 +1055,44 @@ struct RoundTemporal {
Duration t;
switch (options.unit) {
case compute::CalendarUnit::NANOSECOND:
- t = RoundTimePoint<Duration, std::chrono::nanoseconds, Localizer>(
- arg, options.multiple, localizer_, st);
+ t = RoundTimePoint<Duration, std::chrono::nanoseconds, Localizer>(arg, options,
+ localizer_, st);
break;
case compute::CalendarUnit::MICROSECOND:
t = RoundTimePoint<Duration, std::chrono::microseconds, Localizer>(
- arg, options.multiple, localizer_, st);
+ arg, options, localizer_, st);
break;
case compute::CalendarUnit::MILLISECOND:
t = RoundTimePoint<Duration, std::chrono::milliseconds, Localizer>(
- arg, options.multiple, localizer_, st);
+ arg, options, localizer_, st);
break;
case compute::CalendarUnit::SECOND:
- t = RoundTimePoint<Duration, std::chrono::seconds, Localizer>(
- arg, options.multiple, localizer_, st);
+ t = RoundTimePoint<Duration, std::chrono::seconds, Localizer>(arg, options,
+ localizer_, st);
break;
case compute::CalendarUnit::MINUTE:
- t = RoundTimePoint<Duration, minutes, Localizer>(arg, options.multiple,
- localizer_, st);
+ t = RoundTimePoint<Duration, minutes, Localizer>(arg, options, localizer_, st);
break;
case compute::CalendarUnit::HOUR:
- t = RoundTimePoint<Duration, std::chrono::hours, Localizer>(arg, options.multiple,
+ t = RoundTimePoint<Duration, std::chrono::hours, Localizer>(arg, options,
localizer_, st);
break;
case compute::CalendarUnit::DAY:
- t = RoundTimePoint<Duration, days, Localizer>(arg, options.multiple, localizer_,
- st);
+ t = RoundTimePoint<Duration, days, Localizer>(arg, options, localizer_, st);
break;
case compute::CalendarUnit::WEEK:
if (options.week_starts_monday) {
- t = RoundWeekTimePoint<Duration, Localizer>(arg, options.multiple, localizer_,
- days{3}, st);
+ t = RoundWeekTimePoint<Duration, Localizer>(arg, options, localizer_, days{3},
+ st);
} else {
- t = RoundWeekTimePoint<Duration, Localizer>(arg, options.multiple, localizer_,
- days{4}, st);
+ t = RoundWeekTimePoint<Duration, Localizer>(arg, options, localizer_, days{4},
+ st);
}
break;
case compute::CalendarUnit::MONTH: {
auto t0 = localizer_.template ConvertTimePoint<Duration>(arg);
- year_month_day ymd =
- GetFlooredYmd<Duration, Localizer>(arg, options.multiple, localizer_);
+ year_month_day ymd = GetFlooredYmd<Duration, Localizer>(arg, options.multiple,
+ options, localizer_);
auto f = localizer_.ConvertDays(ymd.year() / ymd.month() / 1);
ymd += months{options.multiple};
@@ -1015,8 +1103,8 @@ struct RoundTemporal {
}
case compute::CalendarUnit::QUARTER: {
auto t0 = localizer_.template ConvertTimePoint<Duration>(arg);
- year_month_day ymd =
- GetFlooredYmd<Duration, Localizer>(arg, 3 * options.multiple, localizer_);
+ year_month_day ymd = GetFlooredYmd<Duration, Localizer>(arg, 3 * options.multiple,
+ options, localizer_);
auto f = localizer_.ConvertDays(ymd.year() / ymd.month() / 1);
ymd += months{3 * options.multiple};
diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx
index e74404a771..908bcc0de7 100644
--- a/python/pyarrow/_compute.pyx
+++ b/python/pyarrow/_compute.pyx
@@ -882,11 +882,13 @@ cdef CCalendarUnit unwrap_round_temporal_unit(unit) except *:
cdef class _RoundTemporalOptions(FunctionOptions):
- def _set_options(self, multiple, unit, week_starts_monday):
+ def _set_options(self, multiple, unit, week_starts_monday,
+ ceil_is_strictly_greater, calendar_based_origin):
self.wrapped.reset(
new CRoundTemporalOptions(
multiple, unwrap_round_temporal_unit(unit),
- week_starts_monday)
+ week_starts_monday, ceil_is_strictly_greater,
+ calendar_based_origin)
)
@@ -905,10 +907,41 @@ class RoundTemporalOptions(_RoundTemporalOptions):
"nanosecond".
week_starts_monday : bool, default True
If True, weeks start on Monday; if False, on Sunday.
- """
-
- def __init__(self, multiple=1, unit="day", week_starts_monday=True):
- self._set_options(multiple, unit, week_starts_monday)
+ ceil_is_strictly_greater : bool, default False
+ If True, ceil returns a rounded value that is strictly greater than the
+ input. For example: ceiling 1970-01-01T00:00:00 to 3 hours would
+ yield 1970-01-01T03:00:00 if set to True and 1970-01-01T00:00:00
+ if set to False.
+ This applies to the ceil_temporal function only.
+ calendar_based_origin : bool, default False
+ By default, the origin is 1970-01-01T00:00:00. By setting this to True,
+ rounding origin will be beginning of one less precise calendar unit.
+ E.g.: rounding to hours will use beginning of day as origin.
+
+ By default time is rounded to a multiple of units since
+ 1970-01-01T00:00:00. By setting calendar_based_origin to true,
+ time will be rounded to number of units since the last greater
+ calendar unit.
+ For example: rounding to multiple of days since the beginning of the
+ month or to hours since the beginning of the day.
+ Exceptions: week and quarter are not used as greater units,
+ therefore days will be rounded to the beginning of the month not
+ week. Greater unit of week is a year.
+ Note that ceiling and rounding might change sorting order of an array
+ near greater unit change. For example rounding YYYY-mm-dd 23:00:00 to
+ 5 hours will ceil and round to YYYY-mm-dd+1 01:00:00 and floor to
+ YYYY-mm-dd 20:00:00. On the other hand YYYY-mm-dd+1 00:00:00 will
+ ceil, round and floor to YYYY-mm-dd+1 00:00:00. This can break the
+ order of an already ordered array.
+
+ """
+
+ def __init__(self, multiple=1, unit="day", *, week_starts_monday=True,
+ ceil_is_strictly_greater=False,
+ calendar_based_origin=False):
+ self._set_options(multiple, unit, week_starts_monday,
+ ceil_is_strictly_greater,
+ calendar_based_origin)
cdef class _RoundToMultipleOptions(FunctionOptions):
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index fe93ec9a2f..ba651af50b 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -1971,10 +1971,14 @@ cdef extern from "arrow/compute/api.h" namespace "arrow::compute" nogil:
cdef cppclass CRoundTemporalOptions \
"arrow::compute::RoundTemporalOptions"(CFunctionOptions):
CRoundTemporalOptions(int multiple, CCalendarUnit unit,
- c_bool week_starts_monday)
+ c_bool week_starts_monday,
+ c_bool ceil_is_strictly_greater,
+ c_bool calendar_based_origin)
int multiple
CCalendarUnit unit
c_bool week_starts_monday
+ c_bool ceil_is_strictly_greater
+ c_bool calendar_based_origin
cdef cppclass CRoundToMultipleOptions \
"arrow::compute::RoundToMultipleOptions"(CFunctionOptions):
diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py
index 2afee0c2d9..068cea9805 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -153,7 +153,7 @@ def test_option_class_equality():
pc.ReplaceSliceOptions(0, 1, "a"),
pc.ReplaceSubstringOptions("a", "b"),
pc.RoundOptions(2, "towards_infinity"),
- pc.RoundTemporalOptions(1, "second", True),
+ pc.RoundTemporalOptions(1, "second", week_starts_monday=True),
pc.RoundToMultipleOptions(100, "towards_infinity"),
pc.ScalarAggregateOptions(),
pc.SelectKOptions(0, sort_keys=[("b", "ascending")]),
@@ -2038,6 +2038,14 @@ def _check_temporal_rounding(ts, values, unit):
"hour": "H",
"day": "D"
}
+ greater_unit = {
+ "nanosecond": "us",
+ "microsecond": "ms",
+ "millisecond": "s",
+ "second": "min",
+ "minute": "H",
+ "hour": "d",
+ }
ta = pa.array(ts)
for value in values:
@@ -2056,6 +2064,27 @@ def _check_temporal_rounding(ts, values, unit):
expected = ts.dt.round(frequency)
np.testing.assert_array_equal(result, expected)
+ # Check rounding with calendar_based_origin=True.
+ # Note: rounding to month is not supported in Pandas so we can't
+ # approximate this functionallity and exclude unit == "day".
+ if unit != "day":
+ options = pc.RoundTemporalOptions(
+ value, unit, calendar_based_origin=True)
+ origin = ts.dt.floor(greater_unit[unit])
+
+ if ta.type.tz is None:
+ result = pc.ceil_temporal(ta, options=options).to_pandas()
+ expected = (ts - origin).dt.ceil(frequency) + origin
+ np.testing.assert_array_equal(result, expected)
+
+ result = pc.floor_temporal(ta, options=options).to_pandas()
+ expected = (ts - origin).dt.floor(frequency) + origin
+ np.testing.assert_array_equal(result, expected)
+
+ result = pc.round_temporal(ta, options=options).to_pandas()
+ expected = (ts - origin).dt.round(frequency) + origin
+ np.testing.assert_array_equal(result, expected)
+
# Check RoundTemporalOptions partial defaults
if unit == "day":
result = pc.ceil_temporal(ta, multiple=value).to_pandas()
@@ -2070,6 +2099,22 @@ def _check_temporal_rounding(ts, values, unit):
expected = ts.dt.round(frequency)
np.testing.assert_array_equal(result, expected)
+ # We naively test ceil_is_strictly_greater by adding time unit multiple
+ # to regular ceiled timestamp if it is equal to the original timestamp.
+ # This does not work if timestamp is zoned since our logic will not
+ # account for DST jumps.
+ if ta.type.tz is None:
+ options = pc.RoundTemporalOptions(
+ value, unit, ceil_is_strictly_greater=True)
+ result = pc.ceil_temporal(ta, options=options)
+ expected = ts.dt.ceil(frequency)
+
+ expected = np.where(
+ expected == ts,
+ expected + pd.Timedelta(value, unit_shorthand[unit]),
+ expected)
+ np.testing.assert_array_equal(result, expected)
+
# Check RoundTemporalOptions defaults
if unit == "day":
frequency = "1D"
@@ -2096,9 +2141,8 @@ def _check_temporal_rounding(ts, values, unit):
def test_round_temporal(unit):
from pyarrow.vendored.version import Version
- if Version(pd.__version__) < Version('1.0.0') and \
- unit in ("nanosecond", "microsecond"):
- pytest.skip('Pandas < 1.0 rounds zoned small units differently.')
+ if Version(pd.__version__) < Version('1.0.0'):
+ pytest.skip('Pandas < 1.0 rounds differently.')
values = (1, 2, 3, 4, 5, 6, 7, 10, 15, 24, 60, 250, 500, 750)
timestamps = [
@@ -2112,6 +2156,7 @@ def test_round_temporal(unit):
"1967-02-26 05:56:46.922376960",
"1975-11-01 10:55:37.016146432",
"1982-01-21 18:43:44.517366784",
+ "1992-01-01 00:00:00.100000000",
"1999-12-04 05:55:34.794991104",
"2026-10-26 08:39:00.316686848"]
ts = pd.Series([pd.Timestamp(x, unit="ns") for x in timestamps])