You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2022/05/16 13:55:19 UTC

[GitHub] [arrow] pitrou commented on a diff in pull request #12657: ARROW-14821: [C++][R] Implement bindings for lubridate's floor_date, ceiling_date, and round_date

pitrou commented on code in PR #12657:
URL: https://github.com/apache/arrow/pull/12657#discussion_r873732961


##########
python/pyarrow/tests/test_compute.py:
##########
@@ -2085,6 +2085,18 @@ def _check_temporal_rounding(ts, values, unit):
         expected = ts.dt.round(frequency)
         np.testing.assert_array_equal(result, expected)
 
+    if ta.type.tz is None and unit != "day":

Review Comment:
   What happens if `ta.type.tz` is not None, or if `unit` is days?



##########
cpp/src/arrow/compute/kernels/scalar_temporal_test.cc:
##########
@@ -2632,6 +3117,129 @@ TEST_F(ScalarTemporalTest, TestCeilFloorRoundTemporalBrussels) {
   CheckScalarUnary("round_temporal", unit, times, unit, round_2_hours, &round_to_2_hours);
 }
 
+TEST_F(ScalarTemporalTest, TestRoundTemporalMultipleSinceGreaterUnit) {
+  std::string op = "round_temporal";
+  RoundTemporalOptions round_to_15_nanoseconds =
+      RoundTemporalOptions(15, CalendarUnit::NANOSECOND, true, true, true);
+  RoundTemporalOptions round_to_15_microseconds =
+      RoundTemporalOptions(15, CalendarUnit::MICROSECOND, true, true, true);
+  RoundTemporalOptions round_to_15_milliseconds =
+      RoundTemporalOptions(15, CalendarUnit::MILLISECOND, true, true, true);
+  RoundTemporalOptions round_to_13_seconds =
+      RoundTemporalOptions(13, CalendarUnit::SECOND, true, true, true);
+  RoundTemporalOptions round_to_13_minutes =
+      RoundTemporalOptions(13, CalendarUnit::MINUTE, true, true, true);
+  RoundTemporalOptions round_to_15_hours =
+      RoundTemporalOptions(15, CalendarUnit::HOUR, true, true, true);
+  RoundTemporalOptions round_to_15_days =
+      RoundTemporalOptions(15, CalendarUnit::DAY, true, true, true);
+  RoundTemporalOptions round_to_15_weeks =
+      RoundTemporalOptions(15, CalendarUnit::WEEK, true, true, true);
+  RoundTemporalOptions round_to_15_weeks_sunday =
+      RoundTemporalOptions(15, CalendarUnit::WEEK, false, true, true);
+  RoundTemporalOptions round_to_15_months =
+      RoundTemporalOptions(15, CalendarUnit::MONTH, true, true, true);
+  RoundTemporalOptions round_to_15_quarters =
+      RoundTemporalOptions(15, CalendarUnit::QUARTER, true, true, true);
+  RoundTemporalOptions round_to_15_years =
+      RoundTemporalOptions(15, CalendarUnit::YEAR, true, true, true);
+
+  const char* round_15_nanosecond =
+      R"(["1970-01-01 00:00:59.123456795", "2000-02-29 23:23:24.000000005",
+          "1899-01-01 00:59:20.001001000", "2033-05-18 03:33:20.000000000",
+          "2020-01-01 01:05:05.001000000", "2019-12-31 02:10:10.002000000",
+          "2019-12-30 03:15:15.003000000", "2009-12-31 04:20:20.004132000",
+          "2010-01-01 05:25:25.005321000", "2010-01-03 06:30:30.006163000",
+          "2010-01-04 07:35:35.000000000", "2006-01-01 08:40:40.000000000",
+          "2005-12-31 09:45:45.000000000", "2008-12-28 00:00:00.000000000",
+          "2008-12-29 00:00:00.000000000", "2012-01-01 01:02:03.000000000", null])";
+  const char* round_15_microsecond =
+      R"(["1970-01-01 00:00:59.123450", "2000-02-29 23:23:24.000005",
+          "1899-01-01 00:59:20.001000", "2033-05-18 03:33:20.000000",
+          "2020-01-01 01:05:05.001000", "2019-12-31 02:10:10.002000",
+          "2019-12-30 03:15:15.003000", "2009-12-31 04:20:20.004135",
+          "2010-01-01 05:25:25.005315", "2010-01-03 06:30:30.006165",
+          "2010-01-04 07:35:35.000000", "2006-01-01 08:40:40.000000",
+          "2005-12-31 09:45:45.000000", "2008-12-28 00:00:00.000000",
+          "2008-12-29 00:00:00.000000", "2012-01-01 01:02:03.000000", null])";
+  const char* round_15_millisecond =
+      R"(["1970-01-01 00:00:59.120", "2000-02-29 23:23:24.005",
+          "1899-01-01 00:59:20.000", "2033-05-18 03:33:20.000",
+          "2020-01-01 01:05:05.000", "2019-12-31 02:10:10.000",
+          "2019-12-30 03:15:15.000", "2009-12-31 04:20:20.000",
+          "2010-01-01 05:25:25.000", "2010-01-03 06:30:30.000",
+          "2010-01-04 07:35:35.000", "2006-01-01 08:40:40.000",
+          "2005-12-31 09:45:45.000", "2008-12-28 00:00:00.000",
+          "2008-12-29 00:00:00.000", "2012-01-01 01:02:03.000", null])";
+  const char* round_13_second =
+      R"(["1970-01-01 00:01:05", "2000-02-29 23:23:26", "1899-01-01 00:59:26",

Review Comment:
   I'm afraid I don't understand. This is rounding to a multiple of 13 seconds since the last minute, right?
   If so, then `23:23:26` is correct (26 is a multiple of 13) but `00:01:05` is not (5 is not a multiple of 13, neither is 65).
   



##########
cpp/src/arrow/compute/kernels/scalar_temporal_test.cc:
##########
@@ -2632,6 +3117,129 @@ TEST_F(ScalarTemporalTest, TestCeilFloorRoundTemporalBrussels) {
   CheckScalarUnary("round_temporal", unit, times, unit, round_2_hours, &round_to_2_hours);
 }
 
+TEST_F(ScalarTemporalTest, TestRoundTemporalMultipleSinceGreaterUnit) {
+  std::string op = "round_temporal";
+  RoundTemporalOptions round_to_15_nanoseconds =
+      RoundTemporalOptions(15, CalendarUnit::NANOSECOND, true, true, true);
+  RoundTemporalOptions round_to_15_microseconds =
+      RoundTemporalOptions(15, CalendarUnit::MICROSECOND, true, true, true);
+  RoundTemporalOptions round_to_15_milliseconds =
+      RoundTemporalOptions(15, CalendarUnit::MILLISECOND, true, true, true);
+  RoundTemporalOptions round_to_13_seconds =
+      RoundTemporalOptions(13, CalendarUnit::SECOND, true, true, true);
+  RoundTemporalOptions round_to_13_minutes =
+      RoundTemporalOptions(13, CalendarUnit::MINUTE, true, true, true);
+  RoundTemporalOptions round_to_15_hours =
+      RoundTemporalOptions(15, CalendarUnit::HOUR, true, true, true);
+  RoundTemporalOptions round_to_15_days =
+      RoundTemporalOptions(15, CalendarUnit::DAY, true, true, true);
+  RoundTemporalOptions round_to_15_weeks =
+      RoundTemporalOptions(15, CalendarUnit::WEEK, true, true, true);
+  RoundTemporalOptions round_to_15_weeks_sunday =
+      RoundTemporalOptions(15, CalendarUnit::WEEK, false, true, true);
+  RoundTemporalOptions round_to_15_months =
+      RoundTemporalOptions(15, CalendarUnit::MONTH, true, true, true);
+  RoundTemporalOptions round_to_15_quarters =
+      RoundTemporalOptions(15, CalendarUnit::QUARTER, true, true, true);
+  RoundTemporalOptions round_to_15_years =
+      RoundTemporalOptions(15, CalendarUnit::YEAR, true, true, true);
+
+  const char* round_15_nanosecond =
+      R"(["1970-01-01 00:00:59.123456795", "2000-02-29 23:23:24.000000005",
+          "1899-01-01 00:59:20.001001000", "2033-05-18 03:33:20.000000000",
+          "2020-01-01 01:05:05.001000000", "2019-12-31 02:10:10.002000000",
+          "2019-12-30 03:15:15.003000000", "2009-12-31 04:20:20.004132000",
+          "2010-01-01 05:25:25.005321000", "2010-01-03 06:30:30.006163000",
+          "2010-01-04 07:35:35.000000000", "2006-01-01 08:40:40.000000000",
+          "2005-12-31 09:45:45.000000000", "2008-12-28 00:00:00.000000000",
+          "2008-12-29 00:00:00.000000000", "2012-01-01 01:02:03.000000000", null])";
+  const char* round_15_microsecond =
+      R"(["1970-01-01 00:00:59.123450", "2000-02-29 23:23:24.000005",
+          "1899-01-01 00:59:20.001000", "2033-05-18 03:33:20.000000",
+          "2020-01-01 01:05:05.001000", "2019-12-31 02:10:10.002000",
+          "2019-12-30 03:15:15.003000", "2009-12-31 04:20:20.004135",
+          "2010-01-01 05:25:25.005315", "2010-01-03 06:30:30.006165",
+          "2010-01-04 07:35:35.000000", "2006-01-01 08:40:40.000000",
+          "2005-12-31 09:45:45.000000", "2008-12-28 00:00:00.000000",
+          "2008-12-29 00:00:00.000000", "2012-01-01 01:02:03.000000", null])";
+  const char* round_15_millisecond =
+      R"(["1970-01-01 00:00:59.120", "2000-02-29 23:23:24.005",
+          "1899-01-01 00:59:20.000", "2033-05-18 03:33:20.000",
+          "2020-01-01 01:05:05.000", "2019-12-31 02:10:10.000",
+          "2019-12-30 03:15:15.000", "2009-12-31 04:20:20.000",
+          "2010-01-01 05:25:25.000", "2010-01-03 06:30:30.000",
+          "2010-01-04 07:35:35.000", "2006-01-01 08:40:40.000",
+          "2005-12-31 09:45:45.000", "2008-12-28 00:00:00.000",
+          "2008-12-29 00:00:00.000", "2012-01-01 01:02:03.000", null])";
+  const char* round_13_second =
+      R"(["1970-01-01 00:01:05", "2000-02-29 23:23:26", "1899-01-01 00:59:26",

Review Comment:
   What are the results with lubridate btw?



##########
python/pyarrow/tests/test_compute.py:
##########
@@ -2111,6 +2122,7 @@ def test_round_temporal(unit):
         "1967-02-26 05:56:46.922376960",
         "1975-11-01 10:55:37.016146432",
         "1982-01-21 18:43:44.517366784",
+        "1992-01-01 00:00:00.100000000",

Review Comment:
   Can we add a dedicated test for `multiple_since_greater_unit`? 



##########
cpp/src/arrow/compute/api_scalar.h:
##########
@@ -117,6 +119,20 @@ class ARROW_EXPORT RoundTemporalOptions : public FunctionOptions {
   CalendarUnit unit;
   /// What day does the week start with (Monday=true, Sunday=false)
   bool week_starts_monday;
+  /// Enable this flag to return a rounded value that is strictly greater than the input.
+  /// For example: ceiling 1970-01-01T00:00:00 to 3 hours would yield 1970-01-01T03:00:00
+  /// if set to true and 1970-01-01T00:00:00 if set to false.
+  /// This applies for ceiling only.
+  bool ceil_is_strictly_greater;
+  /// By default time is rounded to a multiple of units since 1970-01-01T00:00:00.
+  /// By setting multiple_since_greater_unit to true, time will be rounded to a number
+  /// of units since the last greater calendar unit.
+  /// For example: rounding to a multiple of days since the beginning of the month or
+  /// to hours since the beginning of the day.
+  /// Exceptions: week and quarter are not used as greater units, therefore days will
+  /// will be rounded to the beginning of the month not week. Greater unit of week
+  /// is year.
+  bool multiple_since_greater_unit;

Review Comment:
   I'm not fond of `multiple_since_greater_unit` but I cannot think of a vastly better name and the explanation is clear, so we can probably keep it as is.



##########
python/pyarrow/_compute.pyx:
##########
@@ -901,10 +903,35 @@ class RoundTemporalOptions(_RoundTemporalOptions):
         "nanosecond".
     week_starts_monday : bool, default True
         If True, weeks start on Monday; if False, on Sunday.
-    """
-
-    def __init__(self, multiple=1, unit="day", week_starts_monday=True):
-        self._set_options(multiple, unit, week_starts_monday)
+    ceil_is_strictly_greater : bool, default False
+        If True return a rounded value that is strictly greater than the
+        input. For example: ceiling 1970-01-01T00:00:00 to 3 hours would
+        yield 1970-01-01T03:00:00 if set to True and 1970-01-01T00:00:00
+        if set to False.
+        This applies for ceiling only.
+    multiple_since_greater_unit : bool, default False
+        By default origin is 1970-01-01T00:00:00. By setting this to True,
+        rounding origin will be beginning of one less precise calendar unit.
+        E.g.: rounding to hours will use beginning of day as origin.
+
+        By default time is rounded to a multiple of units since
+        1970-01-01T00:00:00. By setting multiple_since_greater_unit to true,
+        time will be rounded to number of units since the last greater
+        calendar unit.
+        For example: rounding to multiple of days since the beginning of the
+        month or to hours since the beginning of the day.
+        Exceptions: week and quarter are not used as greater units,
+        therefor days will will be rounded to the beginning of the month not

Review Comment:
   ```suggestion
           therefore days will be rounded to the beginning of the month not
   ```



##########
cpp/src/arrow/compute/kernels/scalar_temporal_test.cc:
##########
@@ -2632,6 +3117,129 @@ TEST_F(ScalarTemporalTest, TestCeilFloorRoundTemporalBrussels) {
   CheckScalarUnary("round_temporal", unit, times, unit, round_2_hours, &round_to_2_hours);
 }
 
+TEST_F(ScalarTemporalTest, TestRoundTemporalMultipleSinceGreaterUnit) {
+  std::string op = "round_temporal";
+  RoundTemporalOptions round_to_15_nanoseconds =
+      RoundTemporalOptions(15, CalendarUnit::NANOSECOND, true, true, true);
+  RoundTemporalOptions round_to_15_microseconds =
+      RoundTemporalOptions(15, CalendarUnit::MICROSECOND, true, true, true);
+  RoundTemporalOptions round_to_15_milliseconds =
+      RoundTemporalOptions(15, CalendarUnit::MILLISECOND, true, true, true);
+  RoundTemporalOptions round_to_13_seconds =
+      RoundTemporalOptions(13, CalendarUnit::SECOND, true, true, true);
+  RoundTemporalOptions round_to_13_minutes =
+      RoundTemporalOptions(13, CalendarUnit::MINUTE, true, true, true);
+  RoundTemporalOptions round_to_15_hours =
+      RoundTemporalOptions(15, CalendarUnit::HOUR, true, true, true);
+  RoundTemporalOptions round_to_15_days =
+      RoundTemporalOptions(15, CalendarUnit::DAY, true, true, true);
+  RoundTemporalOptions round_to_15_weeks =
+      RoundTemporalOptions(15, CalendarUnit::WEEK, true, true, true);
+  RoundTemporalOptions round_to_15_weeks_sunday =
+      RoundTemporalOptions(15, CalendarUnit::WEEK, false, true, true);
+  RoundTemporalOptions round_to_15_months =
+      RoundTemporalOptions(15, CalendarUnit::MONTH, true, true, true);
+  RoundTemporalOptions round_to_15_quarters =
+      RoundTemporalOptions(15, CalendarUnit::QUARTER, true, true, true);
+  RoundTemporalOptions round_to_15_years =
+      RoundTemporalOptions(15, CalendarUnit::YEAR, true, true, true);
+
+  const char* round_15_nanosecond =
+      R"(["1970-01-01 00:00:59.123456795", "2000-02-29 23:23:24.000000005",
+          "1899-01-01 00:59:20.001001000", "2033-05-18 03:33:20.000000000",
+          "2020-01-01 01:05:05.001000000", "2019-12-31 02:10:10.002000000",
+          "2019-12-30 03:15:15.003000000", "2009-12-31 04:20:20.004132000",
+          "2010-01-01 05:25:25.005321000", "2010-01-03 06:30:30.006163000",
+          "2010-01-04 07:35:35.000000000", "2006-01-01 08:40:40.000000000",
+          "2005-12-31 09:45:45.000000000", "2008-12-28 00:00:00.000000000",
+          "2008-12-29 00:00:00.000000000", "2012-01-01 01:02:03.000000000", null])";
+  const char* round_15_microsecond =
+      R"(["1970-01-01 00:00:59.123450", "2000-02-29 23:23:24.000005",
+          "1899-01-01 00:59:20.001000", "2033-05-18 03:33:20.000000",
+          "2020-01-01 01:05:05.001000", "2019-12-31 02:10:10.002000",
+          "2019-12-30 03:15:15.003000", "2009-12-31 04:20:20.004135",
+          "2010-01-01 05:25:25.005315", "2010-01-03 06:30:30.006165",
+          "2010-01-04 07:35:35.000000", "2006-01-01 08:40:40.000000",
+          "2005-12-31 09:45:45.000000", "2008-12-28 00:00:00.000000",
+          "2008-12-29 00:00:00.000000", "2012-01-01 01:02:03.000000", null])";
+  const char* round_15_millisecond =
+      R"(["1970-01-01 00:00:59.120", "2000-02-29 23:23:24.005",
+          "1899-01-01 00:59:20.000", "2033-05-18 03:33:20.000",
+          "2020-01-01 01:05:05.000", "2019-12-31 02:10:10.000",
+          "2019-12-30 03:15:15.000", "2009-12-31 04:20:20.000",
+          "2010-01-01 05:25:25.000", "2010-01-03 06:30:30.000",
+          "2010-01-04 07:35:35.000", "2006-01-01 08:40:40.000",
+          "2005-12-31 09:45:45.000", "2008-12-28 00:00:00.000",
+          "2008-12-29 00:00:00.000", "2012-01-01 01:02:03.000", null])";
+  const char* round_13_second =
+      R"(["1970-01-01 00:01:05", "2000-02-29 23:23:26", "1899-01-01 00:59:26",
+          "2033-05-18 03:33:26", "2020-01-01 01:05:00", "2019-12-31 02:10:13",
+          "2019-12-30 03:15:13", "2009-12-31 04:20:26", "2010-01-01 05:25:26",
+          "2010-01-03 06:30:26", "2010-01-04 07:35:39", "2006-01-01 08:40:39",
+          "2005-12-31 09:45:39", "2008-12-28 00:00:00", "2008-12-29 00:00:00",
+          "2012-01-01 01:02:00", null])";
+  const char* round_13_minute =
+      R"(["1970-01-01 00:00:00", "2000-02-29 23:26:00", "1899-01-01 01:05:00",
+          "2033-05-18 03:39:00", "2020-01-01 01:00:00", "2019-12-31 02:13:00",
+          "2019-12-30 03:13:00", "2009-12-31 04:26:00", "2010-01-01 05:26:00",
+          "2010-01-03 06:26:00", "2010-01-04 07:39:00", "2006-01-01 08:39:00",
+          "2005-12-31 09:52:00", "2008-12-28 00:00:00", "2008-12-29 00:00:00",
+          "2012-01-01 01:00:00", null])";
+  const char* round_15_hour =
+      R"(["1970-01-01 00:00:00", "2000-03-01 06:00:00", "1899-01-01 00:00:00",

Review Comment:
   If this is rounding to a multiple of 15 hours since the last day, how can `06:00:00` be correct?



##########
cpp/src/arrow/compute/kernels/scalar_temporal_test.cc:
##########
@@ -2229,6 +2229,367 @@ TEST_F(ScalarTemporalTest, TestCeilTemporal) {
   CheckScalarUnary(op, unit, times, unit, ceil_15_years, &round_to_15_years);
 }
 
+TEST_F(ScalarTemporalTest, TestCeilTemporalStrictCeil) {
+  std::string op = "ceil_temporal";
+  RoundTemporalOptions round_to_1_nanoseconds =
+      RoundTemporalOptions(1, CalendarUnit::NANOSECOND, true, true, false);
+  RoundTemporalOptions round_to_1_microseconds =
+      RoundTemporalOptions(1, CalendarUnit::MICROSECOND, true, true, false);
+  RoundTemporalOptions round_to_1_milliseconds =
+      RoundTemporalOptions(1, CalendarUnit::MILLISECOND, true, true, false);
+  RoundTemporalOptions round_to_1_seconds =
+      RoundTemporalOptions(1, CalendarUnit::SECOND, true, true, false);
+  RoundTemporalOptions round_to_1_minutes =
+      RoundTemporalOptions(1, CalendarUnit::MINUTE, true, true, false);
+  RoundTemporalOptions round_to_1_hours =
+      RoundTemporalOptions(1, CalendarUnit::HOUR, true, true, false);
+  RoundTemporalOptions round_to_1_days =
+      RoundTemporalOptions(1, CalendarUnit::DAY, true, true, false);
+  RoundTemporalOptions round_to_1_weeks =
+      RoundTemporalOptions(1, CalendarUnit::WEEK, true, true, false);
+  RoundTemporalOptions round_to_1_weeks_sunday =
+      RoundTemporalOptions(1, CalendarUnit::WEEK, false, true, false);
+  RoundTemporalOptions round_to_1_months =
+      RoundTemporalOptions(1, CalendarUnit::MONTH, true, true, false);
+  RoundTemporalOptions round_to_1_quarters =
+      RoundTemporalOptions(1, CalendarUnit::QUARTER, true, true, false);
+  RoundTemporalOptions round_to_1_years =
+      RoundTemporalOptions(1, CalendarUnit::YEAR, true, true, false);
+
+  RoundTemporalOptions round_to_15_nanoseconds =
+      RoundTemporalOptions(15, CalendarUnit::NANOSECOND, true, true, false);
+  RoundTemporalOptions round_to_15_microseconds =
+      RoundTemporalOptions(15, CalendarUnit::MICROSECOND, true, true, false);
+  RoundTemporalOptions round_to_15_milliseconds =
+      RoundTemporalOptions(15, CalendarUnit::MILLISECOND, true, true, false);
+  RoundTemporalOptions round_to_13_seconds =
+      RoundTemporalOptions(13, CalendarUnit::SECOND, true, true, false);
+  RoundTemporalOptions round_to_13_minutes =
+      RoundTemporalOptions(13, CalendarUnit::MINUTE, true, true, false);
+  RoundTemporalOptions round_to_15_hours =
+      RoundTemporalOptions(15, CalendarUnit::HOUR, true, true, false);
+  RoundTemporalOptions round_to_15_days =
+      RoundTemporalOptions(15, CalendarUnit::DAY, true, true, false);
+  RoundTemporalOptions round_to_15_weeks =
+      RoundTemporalOptions(15, CalendarUnit::WEEK, true, true, false);
+  RoundTemporalOptions round_to_15_weeks_sunday =
+      RoundTemporalOptions(15, CalendarUnit::WEEK, false, true, false);
+  RoundTemporalOptions round_to_15_months =
+      RoundTemporalOptions(15, CalendarUnit::MONTH, true, true, false);
+  RoundTemporalOptions round_to_15_quarters =
+      RoundTemporalOptions(15, CalendarUnit::QUARTER, true, true, false);
+  RoundTemporalOptions round_to_15_years =
+      RoundTemporalOptions(15, CalendarUnit::YEAR, true, true, false);
+
+  const char* ceil_1_nanosecond =
+      R"(["1970-01-01 00:00:59.123456790", "2000-02-29 23:23:24.000000000",
+          "1899-01-01 00:59:20.001001002", "2033-05-18 03:33:20.000000001",
+          "2020-01-01 01:05:05.001000001", "2019-12-31 02:10:10.002000001",
+          "2019-12-30 03:15:15.003000001", "2009-12-31 04:20:20.004132001",
+          "2010-01-01 05:25:25.005321001", "2010-01-03 06:30:30.006163001",
+          "2010-01-04 07:35:35.000000001", "2006-01-01 08:40:40.000000001",
+          "2005-12-31 09:45:45.000000001", "2008-12-28 00:00:00.000000001",
+          "2008-12-29 00:00:00.000000001", "2012-01-01 01:02:03.000000001", null])";
+  const char* ceil_1_microsecond =
+      R"(["1970-01-01 00:00:59.123457", "2000-02-29 23:23:24.000000",
+          "1899-01-01 00:59:20.001002", "2033-05-18 03:33:20.000001",
+          "2020-01-01 01:05:05.001001", "2019-12-31 02:10:10.002001",
+          "2019-12-30 03:15:15.003001", "2009-12-31 04:20:20.004133",
+          "2010-01-01 05:25:25.005322", "2010-01-03 06:30:30.006164",
+          "2010-01-04 07:35:35.000001", "2006-01-01 08:40:40.000001",
+          "2005-12-31 09:45:45.000001", "2008-12-28 00:00:00.000001",
+          "2008-12-29 00:00:00.000001", "2012-01-01 01:02:03.000001", null])";
+  const char* ceil_1_millisecond =
+      R"(["1970-01-01 00:00:59.124", "2000-02-29 23:23:24.000",
+          "1899-01-01 00:59:20.002", "2033-05-18 03:33:20.001",
+          "2020-01-01 01:05:05.002", "2019-12-31 02:10:10.003",
+          "2019-12-30 03:15:15.004", "2009-12-31 04:20:20.005",
+          "2010-01-01 05:25:25.006", "2010-01-03 06:30:30.007",
+          "2010-01-04 07:35:35.001", "2006-01-01 08:40:40.001",
+          "2005-12-31 09:45:45.001", "2008-12-28 00:00:00.001",
+          "2008-12-29 00:00:00.001", "2012-01-01 01:02:03.001", null])";
+  const char* ceil_1_second =
+      R"(["1970-01-01 00:01:00", "2000-02-29 23:23:24", "1899-01-01 00:59:21",
+          "2033-05-18 03:33:21", "2020-01-01 01:05:06", "2019-12-31 02:10:11",
+          "2019-12-30 03:15:16", "2009-12-31 04:20:21", "2010-01-01 05:25:26",
+          "2010-01-03 06:30:31", "2010-01-04 07:35:36", "2006-01-01 08:40:41",
+          "2005-12-31 09:45:46", "2008-12-28 00:00:01", "2008-12-29 00:00:01",
+          "2012-01-01 01:02:04", null])";
+  const char* ceil_1_minute =
+      R"(["1970-01-01 00:01:00", "2000-02-29 23:24:00", "1899-01-01 01:00:00",
+             "2033-05-18 03:34:00", "2020-01-01 01:06:00", "2019-12-31 02:11:00",
+             "2019-12-30 03:16:00", "2009-12-31 04:21:00", "2010-01-01 05:26:00",
+             "2010-01-03 06:31:00", "2010-01-04 07:36:00", "2006-01-01 08:41:00",
+             "2005-12-31 09:46:00", "2008-12-28 00:01:00", "2008-12-29 00:01:00",
+             "2012-01-01 01:03:00", null])";
+  const char* ceil_1_hour =
+      R"(["1970-01-01 01:00:00", "2000-03-01 00:00:00", "1899-01-01 01:00:00",
+          "2033-05-18 04:00:00", "2020-01-01 02:00:00", "2019-12-31 03:00:00",
+          "2019-12-30 04:00:00", "2009-12-31 05:00:00", "2010-01-01 06:00:00",
+          "2010-01-03 07:00:00", "2010-01-04 08:00:00", "2006-01-01 09:00:00",
+          "2005-12-31 10:00:00", "2008-12-28 01:00:00", "2008-12-29 01:00:00",
+          "2012-01-01 02:00:00", null])";
+  const char* ceil_1_day =
+      R"(["1970-01-02", "2000-03-01", "1899-01-02", "2033-05-19",
+          "2020-01-02", "2020-01-01", "2019-12-31", "2010-01-01",
+          "2010-01-02", "2010-01-04", "2010-01-05", "2006-01-02",
+          "2006-01-01", "2008-12-29", "2008-12-30", "2012-01-02", null])";
+  const char* ceil_1_weeks =
+      R"(["1970-01-05", "2000-03-06", "1899-01-02", "2033-05-23",
+          "2020-01-06", "2020-01-06", "2020-01-06", "2010-01-04",
+          "2010-01-04", "2010-01-04", "2010-01-11", "2006-01-02",
+          "2006-01-02", "2008-12-29", "2009-01-05", "2012-01-02",  null])";
+  const char* ceil_1_weeks_sunday =
+      R"(["1970-01-04", "2000-03-05", "1899-01-08", "2033-05-22",
+          "2020-01-05", "2020-01-05", "2020-01-05", "2010-01-03",
+          "2010-01-03", "2010-01-10", "2010-01-10", "2006-01-08",
+          "2006-01-01", "2009-01-04", "2009-01-04", "2012-01-08",  null])";
+  const char* ceil_1_months =
+      R"(["1970-02-01", "2000-03-01", "1899-02-01", "2033-06-01",
+          "2020-02-01", "2020-01-01", "2020-01-01", "2010-01-01",
+          "2010-02-01", "2010-02-01", "2010-02-01", "2006-02-01",
+          "2006-01-01", "2009-01-01", "2009-01-01", "2012-02-01", null])";
+  const char* ceil_1_quarters =
+      R"(["1970-04-01", "2000-04-01", "1899-04-01", "2033-07-01",
+          "2020-04-01", "2020-01-01", "2020-01-01", "2010-01-01",
+          "2010-04-01", "2010-04-01", "2010-04-01", "2006-04-01",
+          "2006-01-01", "2009-01-01", "2009-01-01", "2012-04-01", null])";
+  const char* ceil_1_years =
+      R"(["1971-01-01", "2001-01-01", "1900-01-01", "2034-01-01",
+          "2021-01-01", "2020-01-01", "2020-01-01", "2010-01-01",
+          "2011-01-01", "2011-01-01", "2011-01-01", "2007-01-01",
+          "2006-01-01", "2009-01-01", "2009-01-01", "2013-01-01", null])";

Review Comment:
   What does "we will ultimately test this against lubridate" mean here? I do not see any added R tests...



##########
cpp/src/arrow/compute/kernels/scalar_temporal_test.cc:
##########
@@ -2632,6 +3117,129 @@ TEST_F(ScalarTemporalTest, TestCeilFloorRoundTemporalBrussels) {
   CheckScalarUnary("round_temporal", unit, times, unit, round_2_hours, &round_to_2_hours);
 }
 
+TEST_F(ScalarTemporalTest, TestRoundTemporalMultipleSinceGreaterUnit) {
+  std::string op = "round_temporal";
+  RoundTemporalOptions round_to_15_nanoseconds =
+      RoundTemporalOptions(15, CalendarUnit::NANOSECOND, true, true, true);
+  RoundTemporalOptions round_to_15_microseconds =
+      RoundTemporalOptions(15, CalendarUnit::MICROSECOND, true, true, true);
+  RoundTemporalOptions round_to_15_milliseconds =
+      RoundTemporalOptions(15, CalendarUnit::MILLISECOND, true, true, true);
+  RoundTemporalOptions round_to_13_seconds =
+      RoundTemporalOptions(13, CalendarUnit::SECOND, true, true, true);
+  RoundTemporalOptions round_to_13_minutes =
+      RoundTemporalOptions(13, CalendarUnit::MINUTE, true, true, true);
+  RoundTemporalOptions round_to_15_hours =
+      RoundTemporalOptions(15, CalendarUnit::HOUR, true, true, true);
+  RoundTemporalOptions round_to_15_days =
+      RoundTemporalOptions(15, CalendarUnit::DAY, true, true, true);
+  RoundTemporalOptions round_to_15_weeks =
+      RoundTemporalOptions(15, CalendarUnit::WEEK, true, true, true);
+  RoundTemporalOptions round_to_15_weeks_sunday =
+      RoundTemporalOptions(15, CalendarUnit::WEEK, false, true, true);
+  RoundTemporalOptions round_to_15_months =
+      RoundTemporalOptions(15, CalendarUnit::MONTH, true, true, true);
+  RoundTemporalOptions round_to_15_quarters =
+      RoundTemporalOptions(15, CalendarUnit::QUARTER, true, true, true);
+  RoundTemporalOptions round_to_15_years =
+      RoundTemporalOptions(15, CalendarUnit::YEAR, true, true, true);
+
+  const char* round_15_nanosecond =
+      R"(["1970-01-01 00:00:59.123456795", "2000-02-29 23:23:24.000000005",
+          "1899-01-01 00:59:20.001001000", "2033-05-18 03:33:20.000000000",
+          "2020-01-01 01:05:05.001000000", "2019-12-31 02:10:10.002000000",
+          "2019-12-30 03:15:15.003000000", "2009-12-31 04:20:20.004132000",
+          "2010-01-01 05:25:25.005321000", "2010-01-03 06:30:30.006163000",
+          "2010-01-04 07:35:35.000000000", "2006-01-01 08:40:40.000000000",
+          "2005-12-31 09:45:45.000000000", "2008-12-28 00:00:00.000000000",
+          "2008-12-29 00:00:00.000000000", "2012-01-01 01:02:03.000000000", null])";
+  const char* round_15_microsecond =
+      R"(["1970-01-01 00:00:59.123450", "2000-02-29 23:23:24.000005",
+          "1899-01-01 00:59:20.001000", "2033-05-18 03:33:20.000000",
+          "2020-01-01 01:05:05.001000", "2019-12-31 02:10:10.002000",
+          "2019-12-30 03:15:15.003000", "2009-12-31 04:20:20.004135",
+          "2010-01-01 05:25:25.005315", "2010-01-03 06:30:30.006165",
+          "2010-01-04 07:35:35.000000", "2006-01-01 08:40:40.000000",
+          "2005-12-31 09:45:45.000000", "2008-12-28 00:00:00.000000",
+          "2008-12-29 00:00:00.000000", "2012-01-01 01:02:03.000000", null])";
+  const char* round_15_millisecond =
+      R"(["1970-01-01 00:00:59.120", "2000-02-29 23:23:24.005",
+          "1899-01-01 00:59:20.000", "2033-05-18 03:33:20.000",
+          "2020-01-01 01:05:05.000", "2019-12-31 02:10:10.000",
+          "2019-12-30 03:15:15.000", "2009-12-31 04:20:20.000",
+          "2010-01-01 05:25:25.000", "2010-01-03 06:30:30.000",
+          "2010-01-04 07:35:35.000", "2006-01-01 08:40:40.000",
+          "2005-12-31 09:45:45.000", "2008-12-28 00:00:00.000",
+          "2008-12-29 00:00:00.000", "2012-01-01 01:02:03.000", null])";
+  const char* round_13_second =
+      R"(["1970-01-01 00:01:05", "2000-02-29 23:23:26", "1899-01-01 00:59:26",
+          "2033-05-18 03:33:26", "2020-01-01 01:05:00", "2019-12-31 02:10:13",
+          "2019-12-30 03:15:13", "2009-12-31 04:20:26", "2010-01-01 05:25:26",
+          "2010-01-03 06:30:26", "2010-01-04 07:35:39", "2006-01-01 08:40:39",
+          "2005-12-31 09:45:39", "2008-12-28 00:00:00", "2008-12-29 00:00:00",
+          "2012-01-01 01:02:00", null])";
+  const char* round_13_minute =
+      R"(["1970-01-01 00:00:00", "2000-02-29 23:26:00", "1899-01-01 01:05:00",
+          "2033-05-18 03:39:00", "2020-01-01 01:00:00", "2019-12-31 02:13:00",
+          "2019-12-30 03:13:00", "2009-12-31 04:26:00", "2010-01-01 05:26:00",
+          "2010-01-03 06:26:00", "2010-01-04 07:39:00", "2006-01-01 08:39:00",
+          "2005-12-31 09:52:00", "2008-12-28 00:00:00", "2008-12-29 00:00:00",
+          "2012-01-01 01:00:00", null])";
+  const char* round_15_hour =
+      R"(["1970-01-01 00:00:00", "2000-03-01 06:00:00", "1899-01-01 00:00:00",
+          "2033-05-18 00:00:00", "2020-01-01 00:00:00", "2019-12-31 00:00:00",
+          "2019-12-30 00:00:00", "2009-12-31 00:00:00", "2010-01-01 00:00:00",
+          "2010-01-03 00:00:00", "2010-01-04 15:00:00", "2006-01-01 15:00:00",
+          "2005-12-31 15:00:00", "2008-12-28 00:00:00", "2008-12-29 00:00:00",
+          "2012-01-01 00:00:00", null])";
+  const char* round_15_day =
+      R"(["1970-01-01", "2000-03-02", "1899-01-01", "2033-05-16",
+          "2020-01-01", "2019-12-31", "2019-12-31", "2009-12-31",
+          "2010-01-01", "2010-01-01", "2010-01-01", "2006-01-01",
+          "2005-12-31", "2008-12-31", "2008-12-31", "2012-01-01", null])";
+  const char* round_15_weeks =
+      R"(["1969-12-29", "2000-04-17", "1899-01-02", "2033-04-18",
+          "2019-12-30", "2019-12-30", "2019-12-30", "2010-01-04",
+          "2010-01-04", "2010-01-04", "2010-01-04", "2006-01-02",
+          "2006-01-02", "2008-11-10", "2008-12-29", "2012-01-02", null])";
+  const char* round_15_weeks_sunday =
+      R"(["1970-01-04", "2000-04-16", "1899-01-01", "2033-04-17",
+          "2019-12-29", "2019-12-29", "2019-12-29", "2010-01-03",
+          "2010-01-03", "2010-01-03", "2010-01-03", "2006-01-01",
+          "2006-01-01", "2009-01-04", "2009-01-04", "2012-01-01", null])";
+  const char* round_15_months =

Review Comment:
   15 months is not very interesting since it's greater than a year, can you try with e.g. 5 months instead?



##########
cpp/src/arrow/compute/api_scalar.h:
##########
@@ -117,6 +119,20 @@ class ARROW_EXPORT RoundTemporalOptions : public FunctionOptions {
   CalendarUnit unit;
   /// What day does the week start with (Monday=true, Sunday=false)
   bool week_starts_monday;
+  /// Enable this flag to return a rounded value that is strictly greater than the input.
+  /// For example: ceiling 1970-01-01T00:00:00 to 3 hours would yield 1970-01-01T03:00:00
+  /// if set to true and 1970-01-01T00:00:00 if set to false.
+  /// This applies for ceiling only.
+  bool ceil_is_strictly_greater;
+  /// By default time is rounded to a multiple of units since 1970-01-01T00:00:00.
+  /// By setting multiple_since_greater_unit to true, time will be rounded to a number
+  /// of units since the last greater calendar unit.
+  /// For example: rounding to a multiple of days since the beginning of the month or
+  /// to hours since the beginning of the day.
+  /// Exceptions: week and quarter are not used as greater units, therefore days will
+  /// will be rounded to the beginning of the month not week. Greater unit of week
+  /// is year.
+  bool multiple_since_greater_unit;

Review Comment:
   Perhaps `calendar_based`? cc @jorisvandenbossche 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org