You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2021/08/13 08:40:16 UTC

[GitHub] [arrow] jorisvandenbossche commented on a change in pull request #10457: ARROW-12980: [C++] Kernels to extract datetime components should be timezone aware

jorisvandenbossche commented on a change in pull request #10457:
URL: https://github.com/apache/arrow/pull/10457#discussion_r688346412



##########
File path: cpp/src/arrow/compute/kernels/scalar_temporal_test.cc
##########
@@ -142,44 +141,211 @@ TEST_F(ScalarTemporalTest, TestTemporalComponentExtractionWithDifferentUnits) {
   }
 }
 
-TEST_F(ScalarTemporalTest, TestZonedTemporalComponentExtraction) {
-  std::string timezone = "Asia/Kolkata";
+TEST_F(ScalarTemporalTest, TestOutsideNanosecondRange) {
+  const char* times = R"(["1677-09-20T00:00:59.123456", "2262-04-13T23:23:23.999999"])";
+
+  auto unit = timestamp(TimeUnit::MICRO);
+  auto iso_calendar_type =
+      struct_({field("iso_year", int64()), field("iso_week", int64()),
+               field("iso_day_of_week", int64())});
+
+  auto year = "[1677, 2262]";
+  auto month = "[9, 4]";
+  auto day = "[20, 13]";
+  auto day_of_week = "[0, 6]";
+  auto day_of_year = "[263, 103]";
+  auto iso_year = "[1677, 2262]";
+  auto iso_week = "[38, 15]";
+  auto iso_calendar =
+      ArrayFromJSON(iso_calendar_type,
+                    R"([{"iso_year": 1677, "iso_week": 38, "iso_day_of_week": 1},
+                          {"iso_year": 2262, "iso_week": 15, "iso_day_of_week": 7}])");
+  auto quarter = "[3, 2]";
+  auto hour = "[0, 23]";
+  auto minute = "[0, 23]";
+  auto second = "[59, 23]";
+  auto millisecond = "[123, 999]";
+  auto microsecond = "[456, 999]";
+  auto nanosecond = "[0, 0]";
+  auto subsecond = "[0.123456, 0.999999]";
+
+  CheckScalarUnary("year", unit, times, int64(), year);
+  CheckScalarUnary("month", unit, times, int64(), month);
+  CheckScalarUnary("day", unit, times, int64(), day);
+  CheckScalarUnary("day_of_week", unit, times, int64(), day_of_week);
+  CheckScalarUnary("day_of_year", unit, times, int64(), day_of_year);
+  CheckScalarUnary("iso_year", unit, times, int64(), iso_year);
+  CheckScalarUnary("iso_week", unit, times, int64(), iso_week);
+  CheckScalarUnary("iso_calendar", ArrayFromJSON(unit, times), iso_calendar);
+  CheckScalarUnary("quarter", unit, times, int64(), quarter);
+  CheckScalarUnary("hour", unit, times, int64(), hour);
+  CheckScalarUnary("minute", unit, times, int64(), minute);
+  CheckScalarUnary("second", unit, times, int64(), second);
+  CheckScalarUnary("millisecond", unit, times, int64(), millisecond);
+  CheckScalarUnary("microsecond", unit, times, int64(), microsecond);
+  CheckScalarUnary("nanosecond", unit, times, int64(), nanosecond);
+  CheckScalarUnary("subsecond", unit, times, float64(), subsecond);
+}
+
+#ifndef _WIN32
+// TODO: We should test on windows once ARROW-13168 is resolved.
+TEST_F(ScalarTemporalTest, TestZoned1) {
+  auto unit = timestamp(TimeUnit::NANO, "Pacific/Marquesas");
+  auto iso_calendar_type =
+      struct_({field("iso_year", int64()), field("iso_week", int64()),
+               field("iso_day_of_week", int64())});
+  auto year =
+      "[1969, 2000, 1898, 2033, 2019, 2019, 2019, 2009, 2009, 2010, 2010, 2005, 2005, "
+      "2008, 2008, 2011, null]";
+  auto month = "[12, 2, 12, 5, 12, 12, 12, 12, 12, 1, 1, 12, 12, 12, 12, 12, null]";
+  auto day = "[31, 29, 31, 17, 31, 30, 29, 30, 31, 2, 3, 31, 31, 27, 28, 31, null]";
+  auto day_of_week = "[2, 1, 5, 1, 1, 0, 6, 2, 3, 5, 6, 5, 5, 5, 6, 5, null]";
+  auto day_of_year =
+      "[365, 60, 365, 137, 365, 364, 363, 364, 365, 2, 3, 365, 365, 362, 363, 365, null]";
+  auto iso_year =
+      "[1970, 2000, 1898, 2033, 2020, 2020, 2019, 2009, 2009, 2009, 2009, 2005, 2005, "
+      "2008, 2008, 2011, null]";
+  auto iso_week = "[1, 9, 52, 20, 1, 1, 52, 53, 53, 53, 53, 52, 52, 52, 52, 52, null]";
+  auto iso_calendar =
+      ArrayFromJSON(iso_calendar_type,
+                    R"([{"iso_year": 1970, "iso_week": 1, "iso_day_of_week": 3},
+                        {"iso_year": 2000, "iso_week": 9, "iso_day_of_week": 2},
+                        {"iso_year": 1898, "iso_week": 52, "iso_day_of_week": 6},
+                        {"iso_year": 2033, "iso_week": 20, "iso_day_of_week": 2},
+                        {"iso_year": 2020, "iso_week": 1, "iso_day_of_week": 2},
+                        {"iso_year": 2020, "iso_week": 1, "iso_day_of_week": 1},
+                        {"iso_year": 2019, "iso_week": 52, "iso_day_of_week": 7},
+                        {"iso_year": 2009, "iso_week": 53, "iso_day_of_week": 3},
+                        {"iso_year": 2009, "iso_week": 53, "iso_day_of_week": 4},
+                        {"iso_year": 2009, "iso_week": 53, "iso_day_of_week": 6},
+                        {"iso_year": 2009, "iso_week": 53, "iso_day_of_week": 7},
+                        {"iso_year": 2005, "iso_week": 52, "iso_day_of_week": 6},
+                        {"iso_year": 2005, "iso_week": 52, "iso_day_of_week": 6},
+                        {"iso_year": 2008, "iso_week": 52, "iso_day_of_week": 6},
+                        {"iso_year": 2008, "iso_week": 52, "iso_day_of_week": 7},
+                        {"iso_year": 2011, "iso_week": 52, "iso_day_of_week": 6}, null])");
+  auto quarter = "[4, 1, 4, 2, 4, 4, 4, 4, 4, 1, 1, 4, 4, 4, 4, 4, null]";
+  auto hour = "[14, 13, 15, 18, 15, 16, 17, 18, 19, 21, 22, 23, 0, 14, 14, 15, null]";
+  auto minute = "[30, 53, 41, 3, 35, 40, 45, 50, 55, 0, 5, 10, 15, 30, 30, 32, null]";
+
+  CheckScalarUnary("year", unit, times, int64(), year);
+  CheckScalarUnary("month", unit, times, int64(), month);
+  CheckScalarUnary("day", unit, times, int64(), day);
+  CheckScalarUnary("day_of_week", unit, times, int64(), day_of_week);
+  CheckScalarUnary("day_of_year", unit, times, int64(), day_of_year);
+  CheckScalarUnary("iso_year", unit, times, int64(), iso_year);
+  CheckScalarUnary("iso_week", unit, times, int64(), iso_week);
+  CheckScalarUnary("iso_calendar", ArrayFromJSON(unit, times), iso_calendar);
+  CheckScalarUnary("quarter", unit, times, int64(), quarter);
+  CheckScalarUnary("hour", unit, times, int64(), hour);
+  CheckScalarUnary("minute", unit, times, int64(), minute);
+  CheckScalarUnary("second", unit, times, int64(), second);
+  CheckScalarUnary("millisecond", unit, times, int64(), millisecond);
+  CheckScalarUnary("microsecond", unit, times, int64(), microsecond);
+  CheckScalarUnary("nanosecond", unit, times, int64(), nanosecond);
+  CheckScalarUnary("subsecond", unit, times, float64(), subsecond);
+}
+
+TEST_F(ScalarTemporalTest, TestZoned2) {
+  for (auto u : internal::AllTimeUnits()) {
+    auto unit = timestamp(u, "Australia/Broken_Hill");
+    auto iso_calendar_type =
+        struct_({field("iso_year", int64()), field("iso_week", int64()),
+                 field("iso_day_of_week", int64())});
+    auto year =
+        "[1970, 2000, 1899, 2033, 2020, 2019, 2019, 2009, 2010, 2010, 2010, 2006, 2005, "
+        "2008, 2008, 2012, null]";
+    auto month = "[1, 3, 1, 5, 1, 12, 12, 12, 1, 1, 1, 1, 12, 12, 12, 1, null]";
+    auto day = "[1, 1, 1, 18, 1, 31, 30, 31, 1, 3, 4, 1, 31, 28, 29, 1, null]";
+    auto day_of_week = "[3, 2, 6, 2, 2, 1, 0, 3, 4, 6, 0, 6, 5, 6, 0, 6, null]";
+    auto day_of_year =
+        "[1, 61, 1, 138, 1, 365, 364, 365, 1, 3, 4, 1, 365, 363, 364, 1, null]";
+    auto iso_year =
+        "[1970, 2000, 1898, 2033, 2020, 2020, 2020, 2009, 2009, 2009, 2010, 2005, 2005, "
+        "2008, 2009, 2011, null]";
+    auto iso_week = "[1, 9, 52, 20, 1, 1, 1, 53, 53, 53, 1, 52, 52, 52, 1, 52, null]";
+    auto iso_calendar =
+        ArrayFromJSON(iso_calendar_type,
+                      R"([{"iso_year": 1970, "iso_week": 1, "iso_day_of_week": 4},
+                          {"iso_year": 2000, "iso_week": 9, "iso_day_of_week": 3},
+                          {"iso_year": 1898, "iso_week": 52, "iso_day_of_week": 7},
+                          {"iso_year": 2033, "iso_week": 20, "iso_day_of_week": 3},
+                          {"iso_year": 2020, "iso_week": 1, "iso_day_of_week": 3},
+                          {"iso_year": 2020, "iso_week": 1, "iso_day_of_week": 2},
+                          {"iso_year": 2020, "iso_week": 1, "iso_day_of_week": 1},
+                          {"iso_year": 2009, "iso_week": 53, "iso_day_of_week": 4},
+                          {"iso_year": 2009, "iso_week": 53, "iso_day_of_week": 5},
+                          {"iso_year": 2009, "iso_week": 53, "iso_day_of_week": 7},
+                          {"iso_year": 2010, "iso_week": 1, "iso_day_of_week": 1},
+                          {"iso_year": 2005, "iso_week": 52, "iso_day_of_week": 7},
+                          {"iso_year": 2005, "iso_week": 52, "iso_day_of_week": 6},
+                          {"iso_year": 2008, "iso_week": 52, "iso_day_of_week": 7},
+                          {"iso_year": 2009, "iso_week": 1, "iso_day_of_week": 1},
+                          {"iso_year": 2011, "iso_week": 52, "iso_day_of_week": 7}, null])");
+    auto quarter = "[1, 1, 1, 2, 1, 4, 4, 4, 1, 1, 1, 1, 4, 4, 4, 1, null]";
+    auto hour = "[9, 9, 9, 13, 11, 12, 13, 14, 15, 17, 18, 19, 20, 10, 10, 11, null]";
+    auto minute = "[30, 53, 59, 3, 35, 40, 45, 50, 55, 0, 5, 10, 15, 30, 30, 32, null]";
+
+    CheckScalarUnary("year", unit, times_seconds_precision, int64(), year);
+    CheckScalarUnary("month", unit, times_seconds_precision, int64(), month);
+    CheckScalarUnary("day", unit, times_seconds_precision, int64(), day);
+    CheckScalarUnary("day_of_week", unit, times_seconds_precision, int64(), day_of_week);
+    CheckScalarUnary("day_of_year", unit, times_seconds_precision, int64(), day_of_year);
+    CheckScalarUnary("iso_year", unit, times_seconds_precision, int64(), iso_year);
+    CheckScalarUnary("iso_week", unit, times_seconds_precision, int64(), iso_week);
+    CheckScalarUnary("iso_calendar", ArrayFromJSON(unit, times_seconds_precision),
+                     iso_calendar);
+    CheckScalarUnary("quarter", unit, times_seconds_precision, int64(), quarter);
+    CheckScalarUnary("hour", unit, times_seconds_precision, int64(), hour);
+    CheckScalarUnary("minute", unit, times_seconds_precision, int64(), minute);
+    CheckScalarUnary("second", unit, times_seconds_precision, int64(), second);
+    CheckScalarUnary("millisecond", unit, times_seconds_precision, int64(), zeros);
+    CheckScalarUnary("microsecond", unit, times_seconds_precision, int64(), zeros);
+    CheckScalarUnary("nanosecond", unit, times_seconds_precision, int64(), zeros);
+    CheckScalarUnary("subsecond", unit, times_seconds_precision, float64(), zeros);
+  }
+}
+
+TEST_F(ScalarTemporalTest, TestZoned3) {
+  auto data_buffer = Buffer::Wrap(std::vector<int32_t>{1, 2, 3});
+  auto null_buffer = Buffer::FromString("\xff");
 
   for (auto u : internal::AllTimeUnits()) {
-    auto unit = timestamp(u, timezone);
-    auto timestamps = ArrayFromJSON(unit, times_seconds_precision);
-
-    ASSERT_RAISES(NotImplemented, Year(timestamps));
-    ASSERT_RAISES(NotImplemented, Month(timestamps));
-    ASSERT_RAISES(NotImplemented, Day(timestamps));
-    ASSERT_RAISES(NotImplemented, DayOfWeek(timestamps));
-    ASSERT_RAISES(NotImplemented, DayOfYear(timestamps));
-    ASSERT_RAISES(NotImplemented, ISOYear(timestamps));
-    ASSERT_RAISES(NotImplemented, ISOWeek(timestamps));
-    ASSERT_RAISES(NotImplemented, ISOCalendar(timestamps));
-    ASSERT_RAISES(NotImplemented, Quarter(timestamps));
-    ASSERT_RAISES(NotImplemented, Hour(timestamps));
-    ASSERT_RAISES(NotImplemented, Minute(timestamps));
-    ASSERT_RAISES(NotImplemented, Second(timestamps));
-    ASSERT_RAISES(NotImplemented, Millisecond(timestamps));
-    ASSERT_RAISES(NotImplemented, Microsecond(timestamps));
-    ASSERT_RAISES(NotImplemented, Nanosecond(timestamps));
-    ASSERT_RAISES(NotImplemented, Subsecond(timestamps));
+    auto ts_type = timestamp(u, "Mars/Mariner_Valley");

Review comment:
       Can you add a comment here (or embed it in the name of the test) that this is a test for a invalid / non-existing timezone? 




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org