You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2021/12/21 14:17:39 UTC

[GitHub] [arrow] lidavidm commented on a change in pull request #11818: ARROW-14822: [C++] Implement floor/ceil/round for temporal objects

lidavidm commented on a change in pull request #11818:
URL: https://github.com/apache/arrow/pull/11818#discussion_r769134132



##########
File path: cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc
##########
@@ -452,6 +470,341 @@ struct Nanosecond {
   }
 };
 
+// ----------------------------------------------------------------------
+// Round temporal values to given frequency
+
+Result<ValueDescr> ResolveRoundTemporalOutput(KernelContext* ctx,

Review comment:
       Is this not FirstType?

##########
File path: cpp/src/arrow/compute/api_scalar.h
##########
@@ -90,6 +90,36 @@ class ARROW_EXPORT RoundOptions : public FunctionOptions {
   RoundMode round_mode;
 };
 
+enum class CalendarUnit : int8_t {
+  NANOSECOND,
+  MICROSECOND,
+  MILLISECOND,
+  SECOND,
+  MINUTE,
+  HOUR,
+  DAY,
+  WEEK,
+  MONTH,
+  SEASON,
+  YEAR
+};
+
+class ARROW_EXPORT RoundTemporalOptions : public FunctionOptions {
+ public:
+
+  explicit RoundTemporalOptions(int multiple = 1, CalendarUnit unit = CalendarUnit::DAY,
+                                int origin = 0);
+  constexpr static char const kTypeName[] = "RoundTemporalOptions";
+  static RoundTemporalOptions Defaults() { return RoundTemporalOptions(); }
+
+  /// Number of units to round to
+  int multiple;
+  /// The unit used for rounding of time
+  CalendarUnit unit;
+  /// Origin time of rounding

Review comment:
       It might just be because it's early, but what exactly is the effect of origin/zero time in this context?

##########
File path: cpp/src/arrow/compute/api_scalar.h
##########
@@ -90,6 +90,36 @@ class ARROW_EXPORT RoundOptions : public FunctionOptions {
   RoundMode round_mode;
 };
 
+enum class CalendarUnit : int8_t {
+  NANOSECOND,
+  MICROSECOND,
+  MILLISECOND,
+  SECOND,
+  MINUTE,
+  HOUR,
+  DAY,
+  WEEK,
+  MONTH,
+  SEASON,
+  YEAR
+};
+
+class ARROW_EXPORT RoundTemporalOptions : public FunctionOptions {
+ public:
+
+  explicit RoundTemporalOptions(int multiple = 1, CalendarUnit unit = CalendarUnit::DAY,
+                                int origin = 0);
+  constexpr static char const kTypeName[] = "RoundTemporalOptions";
+  static RoundTemporalOptions Defaults() { return RoundTemporalOptions(); }
+
+  /// Number of units to round to
+  int multiple;
+  /// The unit used for rounding of time
+  CalendarUnit unit;
+  /// Origin time of rounding

Review comment:
       Ah, I see some hardcoded years below. I guess, can we at least mention a unit here?

##########
File path: cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc
##########
@@ -452,6 +470,341 @@ struct Nanosecond {
   }
 };
 
+// ----------------------------------------------------------------------
+// Round temporal values to given frequency
+
+Result<ValueDescr> ResolveRoundTemporalOutput(KernelContext* ctx,

Review comment:
       Also, this discards shape information (though, what happens when the output shape gets set to ANY?) and seems like it wouldn't work for durations/dates.

##########
File path: python/pyarrow/_compute.pyx
##########
@@ -774,6 +774,48 @@ class RoundOptions(_RoundOptions):
         self._set_options(ndigits, round_mode)
 
 
+cdef CCalendarUnit unwrap_round_unit(unit) except *:
+    if unit == "nanosecond":
+        return CCalendarUnit_NANOSECOND
+    elif unit == "microsecond":
+        return CCalendarUnit_MICROSECOND
+    elif unit == "millisecond":
+        return CCalendarUnit_MILLISECOND
+    elif unit == "second":
+        return CCalendarUnit_SECOND
+    elif unit == "minute":
+        return CCalendarUnit_MINUTE
+    elif unit == "hour":
+        return CCalendarUnit_HOUR
+    elif unit == "day":
+        return CCalendarUnit_DAY
+    elif unit == "week":
+        return CCalendarUnit_WEEK
+    elif unit == "month":
+        return CCalendarUnit_MONTH
+    elif unit == "season":
+        return CCalendarUnit_SEASON
+    elif unit == "year":
+        return CCalendarUnit_YEAR
+    _raise_invalid_function_option(unit, "Calendar unit")

Review comment:
       Hmm, I wonder:
   1) Should we accept the Pandas-equivalent shorthand?
   2) Should we document the valid Python strings somewhere? 

##########
File path: cpp/src/arrow/compute/api_scalar.h
##########
@@ -90,6 +90,36 @@ class ARROW_EXPORT RoundOptions : public FunctionOptions {
   RoundMode round_mode;
 };
 
+enum class CalendarUnit : int8_t {
+  NANOSECOND,
+  MICROSECOND,
+  MILLISECOND,
+  SECOND,
+  MINUTE,
+  HOUR,
+  DAY,
+  WEEK,
+  MONTH,
+  SEASON,

Review comment:
       Can we define exactly what SEASON is?

##########
File path: cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc
##########
@@ -128,6 +137,18 @@ struct TemporalComponentExtractWeek
   }
 };
 
+template <template <typename...> class Op, typename Duration, typename InType,
+          typename OutType>
+struct TemporalComponentExtractWithOptions

Review comment:
       "...WithOptions" makes it sound like this is a generic implementation even though it's only for RoundTemporalOptions, maybe `TemporalComponentExtractRound`? Or template the options type (but that might get annoying/it should then replace the ones for Week and DayOfWeek above).

##########
File path: python/pyarrow/tests/test_compute.py
##########
@@ -1848,6 +1851,60 @@ def test_assume_timezone():
     result.equals(pa.array(expected))
 
 
+units = (
+    "nanosecond",
+    "microsecond",
+    "millisecond",
+    "second",
+    "minute",
+    "hour",
+    "day",
+)
+unit_shorthand = {
+    "nanosecond": "ns",
+    "microsecond": "us",
+    "millisecond": "L",
+    "second": "s",
+    "minute": "min",
+    "hour": "H",
+    "day": "D"
+}
+
+
+@pytest.mark.parametrize('unit', units)
+def test_round_temporal(unit):

Review comment:
       This also needs a Pandas mark.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org