You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2021/08/02 15:26:49 UTC

[GitHub] [arrow] pitrou commented on a change in pull request #10457: ARROW-12980: [C++] Kernels to extract datetime components should be timezone aware

pitrou commented on a change in pull request #10457:
URL: https://github.com/apache/arrow/pull/10457#discussion_r681043263



##########
File path: cpp/src/arrow/compute/kernels/scalar_temporal_test.cc
##########
@@ -142,29 +141,168 @@ TEST_F(ScalarTemporalTest, TestTemporalComponentExtractionWithDifferentUnits) {
   }
 }
 
-TEST_F(ScalarTemporalTest, TestZonedTemporalComponentExtraction) {
-  std::string timezone = "Asia/Kolkata";
+TEST_F(ScalarTemporalTest, TestOutsideNanosecondRange) {
+  const char* times = R"(["1677-09-20T00:00:59.123456", "2262-04-13T23:23:23.999999"])";
+
+  auto unit = timestamp(TimeUnit::MICRO);
+  auto iso_calendar_type =
+      struct_({field("iso_year", int64()), field("iso_week", int64()),
+               field("iso_day_of_week", int64())});
+
+  auto year = "[1677, 2262]";
+  auto month = "[9, 4]";
+  auto day = "[20, 13]";
+  auto day_of_week = "[0, 6]";
+  auto day_of_year = "[263, 103]";
+  auto iso_year = "[1677, 2262]";
+  auto iso_week = "[38, 15]";
+  auto iso_calendar =
+      ArrayFromJSON(iso_calendar_type,
+                    R"([{"iso_year": 1677, "iso_week": 38, "iso_day_of_week": 1},
+                          {"iso_year": 2262, "iso_week": 15, "iso_day_of_week": 7}])");
+  auto quarter = "[3, 2]";
+  auto hour = "[0, 23]";
+  auto minute = "[0, 23]";
+  auto second = "[59, 23]";
+  auto millisecond = "[123, 999]";
+  auto microsecond = "[456, 999]";
+  auto nanosecond = "[0, 0]";
+  auto subsecond = "[0.123456, 0.999999]";
+
+  CheckScalarUnary("year", unit, times, int64(), year);
+  CheckScalarUnary("month", unit, times, int64(), month);
+  CheckScalarUnary("day", unit, times, int64(), day);
+  CheckScalarUnary("day_of_week", unit, times, int64(), day_of_week);
+  CheckScalarUnary("day_of_year", unit, times, int64(), day_of_year);
+  CheckScalarUnary("iso_year", unit, times, int64(), iso_year);
+  CheckScalarUnary("iso_week", unit, times, int64(), iso_week);
+  CheckScalarUnary("iso_calendar", ArrayFromJSON(unit, times), iso_calendar);
+  CheckScalarUnary("quarter", unit, times, int64(), quarter);
+  CheckScalarUnary("hour", unit, times, int64(), hour);
+  CheckScalarUnary("minute", unit, times, int64(), minute);
+  CheckScalarUnary("second", unit, times, int64(), second);
+  CheckScalarUnary("millisecond", unit, times, int64(), millisecond);
+  CheckScalarUnary("microsecond", unit, times, int64(), microsecond);
+  CheckScalarUnary("nanosecond", unit, times, int64(), nanosecond);
+  CheckScalarUnary("subsecond", unit, times, float64(), subsecond);
+}
+
+#ifndef _WIN32
+// TODO: We should test on windows once ARROW-13168 is resolved.
+TEST_F(ScalarTemporalTest, TestZoned1) {
+  auto unit = timestamp(TimeUnit::NANO, "Pacific/Marquesas");

Review comment:
       What happens with a timezone that doesn't exist in the local database? Can you add a test for that?

##########
File path: cpp/src/arrow/compute/kernels/scalar_temporal.cc
##########
@@ -187,16 +243,28 @@ struct DayOfYear {
 
 template <typename Duration>
 struct ISOYear {
+  explicit ISOYear(const time_zone* tz = nullptr) : tz_(tz) {}
+
   template <typename T, typename Arg0>
-  static T Call(KernelContext*, Arg0 arg, Status*) {
-    const auto t = floor<days>(sys_time<Duration>(Duration{arg}));
+  T Call(KernelContext*, Arg0 arg, Status*) const {
+    if (tz_ == nullptr) {
+      const auto t = floor<days>(sys_time<Duration>(Duration{arg}));
+      auto y = year_month_day{t + days{3}}.year();
+      auto start = sys_time<days>((y - years{1}) / dec / thu[last]) + (mon - thu);
+      if (t < start) {
+        --y;
+      }
+      return static_cast<T>(static_cast<int32_t>(y));
+    }
+    const auto t = floor<days>(tz_->to_local(sys_time<Duration>(Duration{arg})));
     auto y = year_month_day{t + days{3}}.year();
-    auto start = sys_time<days>((y - years{1}) / dec / thu[last]) + (mon - thu);
+    auto start = local_days((y - years{1}) / dec / thu[last]) + (mon - thu);
     if (t < start) {

Review comment:
       It looks like we would like to be able to write:
   ```c++
   template <typename Duration>
   struct ISOYearExtractor {
     template <typename T, typename TimePointConverter, typename DaysConverter>
     static T Call(Duration d, TimePointConverter&& convert_timepoint, DaysConverter&& convert_days) {
       const auto t = floor<days>(convert_timepoint(sys_time<Duration>(d));
       auto y = year_month_day{t + days{3}}.year();
       auto start = convert_days((y - years{1}) / dec / thu[last]) + (mon - thu);
       if (t < start) {
         --y;
       }
       return static_cast<T>(static_cast<int32_t>(y));
     }
   };
   ```
   
   and then something like:
   ```c++
   
   template <typename Duration, template <typename Duration> ExtractorType>
   struct ExtractorExec {
     using Extractor = ExtractorType<Duration>;
   
     explicit ExtractorExec(const time_zone* tz = nullptr) : tz_(tz) {}
   
     template <typename T, typename Arg0>
     T Call(KernelContext*, Arg0 arg, Status*) const {
       const Duration d{arg};
       if (tz_ == nullptr) {
         return Extractor::Call(arg,
             [](sys_time<Duration> t) { return t; },
             [](sys_days d) { return d; });
       }
       return Extractor::Call(arg,
           [this](sys_time<Duration> t) { return tz_->to_local(t); },
           [](sys_days d) { return local_days(d); });
       }
     };
   };
   
   template <typename Duration>
   using ISOYear = ExtractorExec<Duration, ISOYearExtractor>;
   // same for other extractors...
   ```
   

##########
File path: docs/source/cpp/compute.rst
##########
@@ -1056,7 +1056,8 @@ Temporal component extraction
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 These functions extract datetime components (year, month, day, etc) from timestamp type.
-Note: this is currently not supported for timestamps with timezone information.
+Note: supports timestamps with timezone information and will return localized timestamp
+components.

Review comment:
       Rewrite this as (for example): "If the input timestamps have a non-empty timezone, localized timestamp components will be returned".

##########
File path: cpp/src/arrow/compute/kernels/scalar_temporal_test.cc
##########
@@ -216,5 +354,6 @@ TEST_F(ScalarTemporalTest, DayOfWeek) {
                 DayOfWeek(timestamps, DayOfWeekOptions(/*one_based_numbering=*/false,
                                                        /*week_start=*/8)));
 }
+#endif

Review comment:
       You should move this up so that it only encloses tests that don't work on Windows.

##########
File path: docs/source/cpp/compute.rst
##########
@@ -1056,7 +1056,8 @@ Temporal component extraction
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 These functions extract datetime components (year, month, day, etc) from timestamp type.
-Note: this is currently not supported for timestamps with timezone information.
+Note: supports timestamps with timezone information and will return localized timestamp
+components.
 
 +--------------------+------------+-------------------+---------------+----------------------------+-------+
 | Function name      | Arity      | Input types       | Output type   | Options class              | Notes |

Review comment:
       By the way, one question: the table below says "temporal" for the input types, but only timestamp inputs are supported, right?

##########
File path: cpp/src/arrow/compute/kernels/scalar_temporal.cc
##########
@@ -399,6 +533,44 @@ struct ISOCalendar {
   }
 };
 
+template <template <typename...> class Op, typename OutType>
+std::shared_ptr<ScalarFunction> MakeTemporalZoned(std::string name,

Review comment:
       I'm sure you can find a way to share this with `MakeTemporal` below.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org