You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2022/05/05 15:31:13 UTC

[GitHub] [arrow] jorisvandenbossche commented on a diff in pull request #12657: ARROW-14821: [C++][R] Implement bindings for lubridate's floor_date, ceiling_date, and round_date

jorisvandenbossche commented on code in PR #12657:
URL: https://github.com/apache/arrow/pull/12657#discussion_r866026624


##########
cpp/src/arrow/compute/api_scalar.h:
##########
@@ -117,6 +118,13 @@ class ARROW_EXPORT RoundTemporalOptions : public FunctionOptions {
   CalendarUnit unit;
   /// What day does the week start with (Monday=true, Sunday=false)
   bool week_starts_monday;
+  /// Times exactly on unit multiple boundary will be rounded one unit multiple up.
+  /// This applies for ceiling only.
+  bool strict_ceil;
+  /// By default origin is 1970-01-01T00:00:00. By setting this to true, rounding origin
+  /// will be beginning of one less precise calendar unit. E.g. rounding to hours will use
+  /// beginning of day as origin.
+  bool calendar_based_origin;

Review Comment:
   To be honest, (before looking at the implementation) this explanation is not really clear to me what the effect exactly is.



##########
cpp/src/arrow/compute/api_scalar.h:
##########
@@ -117,6 +119,13 @@ class ARROW_EXPORT RoundTemporalOptions : public FunctionOptions {
   CalendarUnit unit;
   /// What day does the week start with (Monday=true, Sunday=false)
   bool week_starts_monday;
+  /// Times exactly on unit multiple boundary will be rounded one unit multiple up.
+  /// This applies for ceiling only.
+  bool change_on_boundary;

Review Comment:
   Do we know if "strict ceil" is a typical term for this behaviour? 
   (it could also be "ceil_on_boundary" to make it more explicit about "ceil")



##########
cpp/src/arrow/compute/kernels/scalar_temporal_test.cc:
##########
@@ -2229,6 +2229,367 @@ TEST_F(ScalarTemporalTest, TestCeilTemporal) {
   CheckScalarUnary(op, unit, times, unit, ceil_15_years, &round_to_15_years);
 }
 
+TEST_F(ScalarTemporalTest, TestCeilTemporalStrictCeil) {
+  std::string op = "ceil_temporal";
+  RoundTemporalOptions round_to_1_nanoseconds =
+      RoundTemporalOptions(1, CalendarUnit::NANOSECOND, true, true, false);
+  RoundTemporalOptions round_to_1_microseconds =
+      RoundTemporalOptions(1, CalendarUnit::MICROSECOND, true, true, false);
+  RoundTemporalOptions round_to_1_milliseconds =
+      RoundTemporalOptions(1, CalendarUnit::MILLISECOND, true, true, false);
+  RoundTemporalOptions round_to_1_seconds =
+      RoundTemporalOptions(1, CalendarUnit::SECOND, true, true, false);
+  RoundTemporalOptions round_to_1_minutes =
+      RoundTemporalOptions(1, CalendarUnit::MINUTE, true, true, false);
+  RoundTemporalOptions round_to_1_hours =
+      RoundTemporalOptions(1, CalendarUnit::HOUR, true, true, false);
+  RoundTemporalOptions round_to_1_days =
+      RoundTemporalOptions(1, CalendarUnit::DAY, true, true, false);
+  RoundTemporalOptions round_to_1_weeks =
+      RoundTemporalOptions(1, CalendarUnit::WEEK, true, true, false);
+  RoundTemporalOptions round_to_1_weeks_sunday =
+      RoundTemporalOptions(1, CalendarUnit::WEEK, false, true, false);
+  RoundTemporalOptions round_to_1_months =
+      RoundTemporalOptions(1, CalendarUnit::MONTH, true, true, false);
+  RoundTemporalOptions round_to_1_quarters =
+      RoundTemporalOptions(1, CalendarUnit::QUARTER, true, true, false);
+  RoundTemporalOptions round_to_1_years =
+      RoundTemporalOptions(1, CalendarUnit::YEAR, true, true, false);
+
+  RoundTemporalOptions round_to_15_nanoseconds =
+      RoundTemporalOptions(15, CalendarUnit::NANOSECOND, true, true, false);
+  RoundTemporalOptions round_to_15_microseconds =
+      RoundTemporalOptions(15, CalendarUnit::MICROSECOND, true, true, false);
+  RoundTemporalOptions round_to_15_milliseconds =
+      RoundTemporalOptions(15, CalendarUnit::MILLISECOND, true, true, false);
+  RoundTemporalOptions round_to_13_seconds =
+      RoundTemporalOptions(13, CalendarUnit::SECOND, true, true, false);
+  RoundTemporalOptions round_to_13_minutes =
+      RoundTemporalOptions(13, CalendarUnit::MINUTE, true, true, false);
+  RoundTemporalOptions round_to_15_hours =
+      RoundTemporalOptions(15, CalendarUnit::HOUR, true, true, false);
+  RoundTemporalOptions round_to_15_days =
+      RoundTemporalOptions(15, CalendarUnit::DAY, true, true, false);
+  RoundTemporalOptions round_to_15_weeks =
+      RoundTemporalOptions(15, CalendarUnit::WEEK, true, true, false);
+  RoundTemporalOptions round_to_15_weeks_sunday =
+      RoundTemporalOptions(15, CalendarUnit::WEEK, false, true, false);
+  RoundTemporalOptions round_to_15_months =
+      RoundTemporalOptions(15, CalendarUnit::MONTH, true, true, false);
+  RoundTemporalOptions round_to_15_quarters =
+      RoundTemporalOptions(15, CalendarUnit::QUARTER, true, true, false);
+  RoundTemporalOptions round_to_15_years =
+      RoundTemporalOptions(15, CalendarUnit::YEAR, true, true, false);
+
+  const char* ceil_1_nanosecond =
+      R"(["1970-01-01 00:00:59.123456790", "2000-02-29 23:23:24.000000000",
+          "1899-01-01 00:59:20.001001002", "2033-05-18 03:33:20.000000001",
+          "2020-01-01 01:05:05.001000001", "2019-12-31 02:10:10.002000001",
+          "2019-12-30 03:15:15.003000001", "2009-12-31 04:20:20.004132001",
+          "2010-01-01 05:25:25.005321001", "2010-01-03 06:30:30.006163001",
+          "2010-01-04 07:35:35.000000001", "2006-01-01 08:40:40.000000001",
+          "2005-12-31 09:45:45.000000001", "2008-12-28 00:00:00.000000001",
+          "2008-12-29 00:00:00.000000001", "2012-01-01 01:02:03.000000001", null])";
+  const char* ceil_1_microsecond =
+      R"(["1970-01-01 00:00:59.123457", "2000-02-29 23:23:24.000000",
+          "1899-01-01 00:59:20.001002", "2033-05-18 03:33:20.000001",
+          "2020-01-01 01:05:05.001001", "2019-12-31 02:10:10.002001",
+          "2019-12-30 03:15:15.003001", "2009-12-31 04:20:20.004133",
+          "2010-01-01 05:25:25.005322", "2010-01-03 06:30:30.006164",
+          "2010-01-04 07:35:35.000001", "2006-01-01 08:40:40.000001",
+          "2005-12-31 09:45:45.000001", "2008-12-28 00:00:00.000001",
+          "2008-12-29 00:00:00.000001", "2012-01-01 01:02:03.000001", null])";
+  const char* ceil_1_millisecond =
+      R"(["1970-01-01 00:00:59.124", "2000-02-29 23:23:24.000",
+          "1899-01-01 00:59:20.002", "2033-05-18 03:33:20.001",
+          "2020-01-01 01:05:05.002", "2019-12-31 02:10:10.003",
+          "2019-12-30 03:15:15.004", "2009-12-31 04:20:20.005",
+          "2010-01-01 05:25:25.006", "2010-01-03 06:30:30.007",
+          "2010-01-04 07:35:35.001", "2006-01-01 08:40:40.001",
+          "2005-12-31 09:45:45.001", "2008-12-28 00:00:00.001",
+          "2008-12-29 00:00:00.001", "2012-01-01 01:02:03.001", null])";
+  const char* ceil_1_second =
+      R"(["1970-01-01 00:01:00", "2000-02-29 23:23:24", "1899-01-01 00:59:21",
+          "2033-05-18 03:33:21", "2020-01-01 01:05:06", "2019-12-31 02:10:11",
+          "2019-12-30 03:15:16", "2009-12-31 04:20:21", "2010-01-01 05:25:26",
+          "2010-01-03 06:30:31", "2010-01-04 07:35:36", "2006-01-01 08:40:41",
+          "2005-12-31 09:45:46", "2008-12-28 00:00:01", "2008-12-29 00:00:01",
+          "2012-01-01 01:02:04", null])";
+  const char* ceil_1_minute =
+      R"(["1970-01-01 00:01:00", "2000-02-29 23:24:00", "1899-01-01 01:00:00",
+             "2033-05-18 03:34:00", "2020-01-01 01:06:00", "2019-12-31 02:11:00",
+             "2019-12-30 03:16:00", "2009-12-31 04:21:00", "2010-01-01 05:26:00",
+             "2010-01-03 06:31:00", "2010-01-04 07:36:00", "2006-01-01 08:41:00",
+             "2005-12-31 09:46:00", "2008-12-28 00:01:00", "2008-12-29 00:01:00",
+             "2012-01-01 01:03:00", null])";
+  const char* ceil_1_hour =
+      R"(["1970-01-01 01:00:00", "2000-03-01 00:00:00", "1899-01-01 01:00:00",
+          "2033-05-18 04:00:00", "2020-01-01 02:00:00", "2019-12-31 03:00:00",
+          "2019-12-30 04:00:00", "2009-12-31 05:00:00", "2010-01-01 06:00:00",
+          "2010-01-03 07:00:00", "2010-01-04 08:00:00", "2006-01-01 09:00:00",
+          "2005-12-31 10:00:00", "2008-12-28 01:00:00", "2008-12-29 01:00:00",
+          "2012-01-01 02:00:00", null])";
+  const char* ceil_1_day =
+      R"(["1970-01-02", "2000-03-01", "1899-01-02", "2033-05-19",
+          "2020-01-02", "2020-01-01", "2019-12-31", "2010-01-01",
+          "2010-01-02", "2010-01-04", "2010-01-05", "2006-01-02",
+          "2006-01-01", "2008-12-29", "2008-12-30", "2012-01-02", null])";
+  const char* ceil_1_weeks =
+      R"(["1970-01-05", "2000-03-06", "1899-01-02", "2033-05-23",
+          "2020-01-06", "2020-01-06", "2020-01-06", "2010-01-04",
+          "2010-01-04", "2010-01-04", "2010-01-11", "2006-01-02",
+          "2006-01-02", "2008-12-29", "2009-01-05", "2012-01-02",  null])";
+  const char* ceil_1_weeks_sunday =
+      R"(["1970-01-04", "2000-03-05", "1899-01-08", "2033-05-22",
+          "2020-01-05", "2020-01-05", "2020-01-05", "2010-01-03",
+          "2010-01-03", "2010-01-10", "2010-01-10", "2006-01-08",
+          "2006-01-01", "2009-01-04", "2009-01-04", "2012-01-08",  null])";
+  const char* ceil_1_months =
+      R"(["1970-02-01", "2000-03-01", "1899-02-01", "2033-06-01",
+          "2020-02-01", "2020-01-01", "2020-01-01", "2010-01-01",
+          "2010-02-01", "2010-02-01", "2010-02-01", "2006-02-01",
+          "2006-01-01", "2009-01-01", "2009-01-01", "2012-02-01", null])";
+  const char* ceil_1_quarters =
+      R"(["1970-04-01", "2000-04-01", "1899-04-01", "2033-07-01",
+          "2020-04-01", "2020-01-01", "2020-01-01", "2010-01-01",
+          "2010-04-01", "2010-04-01", "2010-04-01", "2006-04-01",
+          "2006-01-01", "2009-01-01", "2009-01-01", "2012-04-01", null])";
+  const char* ceil_1_years =
+      R"(["1971-01-01", "2001-01-01", "1900-01-01", "2034-01-01",
+          "2021-01-01", "2020-01-01", "2020-01-01", "2010-01-01",
+          "2011-01-01", "2011-01-01", "2011-01-01", "2007-01-01",
+          "2006-01-01", "2009-01-01", "2009-01-01", "2013-01-01", null])";

Review Comment:
   It might not be really a problem, but in many of the cases above, we are not strictly speaking testing the "strict ceil", as `times` doesn't include a date that would hit that case.
   
   (Alternative would be to define a custom `times` here for this function that has more specific cases for what we want to test here? But maybe it is enough that some of the round units cover it)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org