You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2021/05/10 15:47:00 UTC

[GitHub] [arrow] pitrou commented on a change in pull request #10176: ARROW-11759: [C++] Kernel to extract datetime components (year, month, day, etc) from timestamp type

pitrou commented on a change in pull request #10176:
URL: https://github.com/apache/arrow/pull/10176#discussion_r629456538



##########
File path: cpp/src/arrow/compute/kernels/scalar_temporal_test.cc
##########
@@ -0,0 +1,239 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+#include "arrow/compute/api_scalar.h"
+#include "arrow/compute/kernels/test_util.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/formatting.h"
+
+namespace arrow {
+
+using internal::StringFormatter;
+
+class ScalarTemporalTest : public ::testing::Test {};
+
+namespace compute {
+
+TEST(ScalarTemporalTest, TestSimpleTemporalComponentExtraction) {
+  const char* json =
+      R"(["1970-01-01T00:00:59","2000-02-29T23:23:23",
+          "3989-07-14T18:04:01","1900-01-01T01:59:20","2033-05-18T03:33:20"])";
+  auto time_points = ArrayFromJSON(timestamp(TimeUnit::SECOND), json);
+
+  auto year = ArrayFromJSON(int64(), "[1970, 2000, 3989, 1900, 2033]");
+  auto month = ArrayFromJSON(int64(), "[1, 2, 7, 1, 5]");
+  auto day = ArrayFromJSON(int64(), "[1, 29, 14, 1, 18]");
+  auto day_of_year = ArrayFromJSON(int64(), "[1, 60, 195, 1, 138]");
+  auto week = ArrayFromJSON(int64(), "[1, 9, 28, 1, 20]");
+  auto quarter = ArrayFromJSON(int64(), "[1, 1, 3, 1, 2]");
+  auto day_of_week = ArrayFromJSON(int64(), "[4, 2, 5, 1, 3]");
+  auto hour = ArrayFromJSON(int64(), "[0, 23, 18, 1, 3]");
+  auto minute = ArrayFromJSON(int64(), "[0, 23, 4, 59, 33]");
+  auto second = ArrayFromJSON(int64(), "[59, 23, 1, 20, 20]");
+  auto millisecond = ArrayFromJSON(int64(), "[0, 0, 0, 0, 0]");
+  auto microsecond = ArrayFromJSON(int64(), "[0, 0, 0, 0, 0]");
+  auto nanosecond = ArrayFromJSON(int64(), "[0, 0, 0, 0, 0]");
+
+  ASSERT_OK_AND_ASSIGN(Datum actual_year, Year(time_points));
+  ASSERT_OK_AND_ASSIGN(Datum actual_month, Month(time_points));
+  ASSERT_OK_AND_ASSIGN(Datum actual_day, Day(time_points));
+  ASSERT_OK_AND_ASSIGN(Datum actual_day_of_year, DayOfYear(time_points));
+  ASSERT_OK_AND_ASSIGN(Datum actual_week, Week(time_points));
+  ASSERT_OK_AND_ASSIGN(Datum actual_quarter, Quarter(time_points));
+  ASSERT_OK_AND_ASSIGN(Datum actual_day_of_week, DayOfWeek(time_points));
+  ASSERT_OK_AND_ASSIGN(Datum actual_hour, Hour(time_points));
+  ASSERT_OK_AND_ASSIGN(Datum actual_minute, Minute(time_points));
+  ASSERT_OK_AND_ASSIGN(Datum actual_second, Second(time_points));
+  ASSERT_OK_AND_ASSIGN(Datum actual_millisecond, Millisecond(time_points));
+  ASSERT_OK_AND_ASSIGN(Datum actual_microsecond, Microsecond(time_points));
+  ASSERT_OK_AND_ASSIGN(Datum actual_nanosecond, Nanosecond(time_points));
+
+  ASSERT_EQ(actual_year, year);
+  ASSERT_EQ(actual_month, month);
+  ASSERT_EQ(actual_day, day);
+  ASSERT_EQ(actual_day_of_year, day_of_year);
+  ASSERT_EQ(actual_week, week);
+  ASSERT_EQ(actual_quarter, quarter);
+  ASSERT_EQ(actual_day_of_week, day_of_week);
+  ASSERT_EQ(actual_hour, hour);
+  ASSERT_EQ(actual_minute, minute);
+  ASSERT_EQ(actual_second, second);
+  ASSERT_EQ(actual_millisecond, millisecond);
+  ASSERT_EQ(actual_microsecond, microsecond);
+  ASSERT_EQ(actual_nanosecond, nanosecond);
+
+  CheckScalarUnary("year", time_points, year);
+  CheckScalarUnary("month", time_points, month);
+  CheckScalarUnary("day", time_points, day);
+  CheckScalarUnary("day_of_year", time_points, day_of_year);
+  CheckScalarUnary("week", time_points, week);
+  CheckScalarUnary("quarter", time_points, quarter);
+  CheckScalarUnary("day_of_week", time_points, day_of_week);
+  CheckScalarUnary("hour", time_points, hour);
+  CheckScalarUnary("minute", time_points, minute);
+  CheckScalarUnary("second", time_points, second);
+  CheckScalarUnary("millisecond", time_points, millisecond);
+  CheckScalarUnary("microsecond", time_points, microsecond);
+  CheckScalarUnary("nanosecond", time_points, nanosecond);
+}
+
+TEST(ScalarTemporalTest, TestTemporalComponentExtraction) {
+  const char* json_second = "[59, 951866603, -2208981640, 2000000000]";
+  const char* json_milli = "[59000, 951866603000, -2208981640000, 2000000000000]";
+  const char* json_micro =
+      "[59000000, 951866603000000, -2208981640000000, 2000000000000000]";
+  const char* json_nano =
+      "[59000000000, 951866603000000000, -2208981640000000000, 2000000000000000000]";
+
+  auto time_points_second = ArrayFromJSON(timestamp(TimeUnit::SECOND), json_second);
+  auto time_points_milli = ArrayFromJSON(timestamp(TimeUnit::MILLI), json_milli);
+  auto time_points_micro = ArrayFromJSON(timestamp(TimeUnit::MICRO), json_micro);
+  auto time_points_nano = ArrayFromJSON(timestamp(TimeUnit::NANO), json_nano);
+
+  auto year = ArrayFromJSON(int64(), "[1970, 2000, 1900, 2033]");
+  auto month = ArrayFromJSON(int64(), "[1, 2, 1, 5]");
+  auto day = ArrayFromJSON(int64(), "[1, 29, 1, 18]");
+  auto day_of_year = ArrayFromJSON(int64(), "[1, 60, 1, 138]");
+  auto week = ArrayFromJSON(int64(), "[1, 9, 1, 20]");
+  auto quarter = ArrayFromJSON(int64(), "[1, 1, 1, 2]");
+  auto day_of_week = ArrayFromJSON(int64(), "[4, 2, 1, 3]");
+  auto hour = ArrayFromJSON(int64(), "[0, 23, 1, 3]");
+  auto minute = ArrayFromJSON(int64(), "[0, 23, 59, 33]");
+  auto second = ArrayFromJSON(int64(), "[59, 23, 20, 20]");
+  auto millisecond = ArrayFromJSON(int64(), "[0, 0, 0, 0]");
+  auto microsecond = ArrayFromJSON(int64(), "[0, 0, 0, 0]");
+  auto nanosecond = ArrayFromJSON(int64(), "[0, 0, 0, 0]");

Review comment:
       Hmm... so you never test with a non-zero number of milli/micro/nanoseconds?
   Also, how about testing with null values as well?

##########
File path: cpp/src/arrow/compute/kernels/scalar_temporal_test.cc
##########
@@ -0,0 +1,239 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+#include "arrow/compute/api_scalar.h"
+#include "arrow/compute/kernels/test_util.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/formatting.h"
+
+namespace arrow {
+
+using internal::StringFormatter;
+
+class ScalarTemporalTest : public ::testing::Test {};
+
+namespace compute {
+
+TEST(ScalarTemporalTest, TestSimpleTemporalComponentExtraction) {
+  const char* json =
+      R"(["1970-01-01T00:00:59","2000-02-29T23:23:23",
+          "3989-07-14T18:04:01","1900-01-01T01:59:20","2033-05-18T03:33:20"])";
+  auto time_points = ArrayFromJSON(timestamp(TimeUnit::SECOND), json);
+
+  auto year = ArrayFromJSON(int64(), "[1970, 2000, 3989, 1900, 2033]");
+  auto month = ArrayFromJSON(int64(), "[1, 2, 7, 1, 5]");
+  auto day = ArrayFromJSON(int64(), "[1, 29, 14, 1, 18]");
+  auto day_of_year = ArrayFromJSON(int64(), "[1, 60, 195, 1, 138]");
+  auto week = ArrayFromJSON(int64(), "[1, 9, 28, 1, 20]");
+  auto quarter = ArrayFromJSON(int64(), "[1, 1, 3, 1, 2]");
+  auto day_of_week = ArrayFromJSON(int64(), "[4, 2, 5, 1, 3]");
+  auto hour = ArrayFromJSON(int64(), "[0, 23, 18, 1, 3]");
+  auto minute = ArrayFromJSON(int64(), "[0, 23, 4, 59, 33]");
+  auto second = ArrayFromJSON(int64(), "[59, 23, 1, 20, 20]");
+  auto millisecond = ArrayFromJSON(int64(), "[0, 0, 0, 0, 0]");
+  auto microsecond = ArrayFromJSON(int64(), "[0, 0, 0, 0, 0]");
+  auto nanosecond = ArrayFromJSON(int64(), "[0, 0, 0, 0, 0]");
+
+  ASSERT_OK_AND_ASSIGN(Datum actual_year, Year(time_points));
+  ASSERT_OK_AND_ASSIGN(Datum actual_month, Month(time_points));
+  ASSERT_OK_AND_ASSIGN(Datum actual_day, Day(time_points));
+  ASSERT_OK_AND_ASSIGN(Datum actual_day_of_year, DayOfYear(time_points));
+  ASSERT_OK_AND_ASSIGN(Datum actual_week, Week(time_points));
+  ASSERT_OK_AND_ASSIGN(Datum actual_quarter, Quarter(time_points));
+  ASSERT_OK_AND_ASSIGN(Datum actual_day_of_week, DayOfWeek(time_points));
+  ASSERT_OK_AND_ASSIGN(Datum actual_hour, Hour(time_points));
+  ASSERT_OK_AND_ASSIGN(Datum actual_minute, Minute(time_points));
+  ASSERT_OK_AND_ASSIGN(Datum actual_second, Second(time_points));
+  ASSERT_OK_AND_ASSIGN(Datum actual_millisecond, Millisecond(time_points));
+  ASSERT_OK_AND_ASSIGN(Datum actual_microsecond, Microsecond(time_points));
+  ASSERT_OK_AND_ASSIGN(Datum actual_nanosecond, Nanosecond(time_points));
+
+  ASSERT_EQ(actual_year, year);
+  ASSERT_EQ(actual_month, month);
+  ASSERT_EQ(actual_day, day);
+  ASSERT_EQ(actual_day_of_year, day_of_year);
+  ASSERT_EQ(actual_week, week);
+  ASSERT_EQ(actual_quarter, quarter);
+  ASSERT_EQ(actual_day_of_week, day_of_week);
+  ASSERT_EQ(actual_hour, hour);
+  ASSERT_EQ(actual_minute, minute);
+  ASSERT_EQ(actual_second, second);
+  ASSERT_EQ(actual_millisecond, millisecond);
+  ASSERT_EQ(actual_microsecond, microsecond);
+  ASSERT_EQ(actual_nanosecond, nanosecond);
+
+  CheckScalarUnary("year", time_points, year);
+  CheckScalarUnary("month", time_points, month);
+  CheckScalarUnary("day", time_points, day);
+  CheckScalarUnary("day_of_year", time_points, day_of_year);
+  CheckScalarUnary("week", time_points, week);
+  CheckScalarUnary("quarter", time_points, quarter);
+  CheckScalarUnary("day_of_week", time_points, day_of_week);
+  CheckScalarUnary("hour", time_points, hour);
+  CheckScalarUnary("minute", time_points, minute);
+  CheckScalarUnary("second", time_points, second);
+  CheckScalarUnary("millisecond", time_points, millisecond);
+  CheckScalarUnary("microsecond", time_points, microsecond);
+  CheckScalarUnary("nanosecond", time_points, nanosecond);
+}
+
+TEST(ScalarTemporalTest, TestTemporalComponentExtraction) {

Review comment:
       Is this test different from the previous one?

##########
File path: cpp/src/arrow/compute/kernels/scalar_temporal_test.cc
##########
@@ -0,0 +1,239 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+#include "arrow/compute/api_scalar.h"
+#include "arrow/compute/kernels/test_util.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/formatting.h"
+
+namespace arrow {
+
+using internal::StringFormatter;
+
+class ScalarTemporalTest : public ::testing::Test {};
+
+namespace compute {
+
+TEST(ScalarTemporalTest, TestSimpleTemporalComponentExtraction) {
+  const char* json =
+      R"(["1970-01-01T00:00:59","2000-02-29T23:23:23",
+          "3989-07-14T18:04:01","1900-01-01T01:59:20","2033-05-18T03:33:20"])";
+  auto time_points = ArrayFromJSON(timestamp(TimeUnit::SECOND), json);
+
+  auto year = ArrayFromJSON(int64(), "[1970, 2000, 3989, 1900, 2033]");
+  auto month = ArrayFromJSON(int64(), "[1, 2, 7, 1, 5]");
+  auto day = ArrayFromJSON(int64(), "[1, 29, 14, 1, 18]");
+  auto day_of_year = ArrayFromJSON(int64(), "[1, 60, 195, 1, 138]");
+  auto week = ArrayFromJSON(int64(), "[1, 9, 28, 1, 20]");
+  auto quarter = ArrayFromJSON(int64(), "[1, 1, 3, 1, 2]");
+  auto day_of_week = ArrayFromJSON(int64(), "[4, 2, 5, 1, 3]");
+  auto hour = ArrayFromJSON(int64(), "[0, 23, 18, 1, 3]");
+  auto minute = ArrayFromJSON(int64(), "[0, 23, 4, 59, 33]");
+  auto second = ArrayFromJSON(int64(), "[59, 23, 1, 20, 20]");
+  auto millisecond = ArrayFromJSON(int64(), "[0, 0, 0, 0, 0]");
+  auto microsecond = ArrayFromJSON(int64(), "[0, 0, 0, 0, 0]");
+  auto nanosecond = ArrayFromJSON(int64(), "[0, 0, 0, 0, 0]");

Review comment:
       Is it desirable for all these values to be `int64`? A number of hours, minutes or seconds should fit in a `int8`, for example.

##########
File path: cpp/src/arrow/compute/kernels/scalar_temporal_test.cc
##########
@@ -0,0 +1,239 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+#include "arrow/compute/api_scalar.h"
+#include "arrow/compute/kernels/test_util.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/formatting.h"
+
+namespace arrow {
+
+using internal::StringFormatter;
+
+class ScalarTemporalTest : public ::testing::Test {};
+
+namespace compute {
+
+TEST(ScalarTemporalTest, TestSimpleTemporalComponentExtraction) {
+  const char* json =
+      R"(["1970-01-01T00:00:59","2000-02-29T23:23:23",
+          "3989-07-14T18:04:01","1900-01-01T01:59:20","2033-05-18T03:33:20"])";
+  auto time_points = ArrayFromJSON(timestamp(TimeUnit::SECOND), json);
+
+  auto year = ArrayFromJSON(int64(), "[1970, 2000, 3989, 1900, 2033]");
+  auto month = ArrayFromJSON(int64(), "[1, 2, 7, 1, 5]");
+  auto day = ArrayFromJSON(int64(), "[1, 29, 14, 1, 18]");
+  auto day_of_year = ArrayFromJSON(int64(), "[1, 60, 195, 1, 138]");
+  auto week = ArrayFromJSON(int64(), "[1, 9, 28, 1, 20]");
+  auto quarter = ArrayFromJSON(int64(), "[1, 1, 3, 1, 2]");
+  auto day_of_week = ArrayFromJSON(int64(), "[4, 2, 5, 1, 3]");
+  auto hour = ArrayFromJSON(int64(), "[0, 23, 18, 1, 3]");
+  auto minute = ArrayFromJSON(int64(), "[0, 23, 4, 59, 33]");
+  auto second = ArrayFromJSON(int64(), "[59, 23, 1, 20, 20]");
+  auto millisecond = ArrayFromJSON(int64(), "[0, 0, 0, 0, 0]");
+  auto microsecond = ArrayFromJSON(int64(), "[0, 0, 0, 0, 0]");
+  auto nanosecond = ArrayFromJSON(int64(), "[0, 0, 0, 0, 0]");
+
+  ASSERT_OK_AND_ASSIGN(Datum actual_year, Year(time_points));
+  ASSERT_OK_AND_ASSIGN(Datum actual_month, Month(time_points));
+  ASSERT_OK_AND_ASSIGN(Datum actual_day, Day(time_points));
+  ASSERT_OK_AND_ASSIGN(Datum actual_day_of_year, DayOfYear(time_points));
+  ASSERT_OK_AND_ASSIGN(Datum actual_week, Week(time_points));
+  ASSERT_OK_AND_ASSIGN(Datum actual_quarter, Quarter(time_points));
+  ASSERT_OK_AND_ASSIGN(Datum actual_day_of_week, DayOfWeek(time_points));
+  ASSERT_OK_AND_ASSIGN(Datum actual_hour, Hour(time_points));
+  ASSERT_OK_AND_ASSIGN(Datum actual_minute, Minute(time_points));
+  ASSERT_OK_AND_ASSIGN(Datum actual_second, Second(time_points));
+  ASSERT_OK_AND_ASSIGN(Datum actual_millisecond, Millisecond(time_points));
+  ASSERT_OK_AND_ASSIGN(Datum actual_microsecond, Microsecond(time_points));
+  ASSERT_OK_AND_ASSIGN(Datum actual_nanosecond, Nanosecond(time_points));
+
+  ASSERT_EQ(actual_year, year);
+  ASSERT_EQ(actual_month, month);
+  ASSERT_EQ(actual_day, day);
+  ASSERT_EQ(actual_day_of_year, day_of_year);
+  ASSERT_EQ(actual_week, week);
+  ASSERT_EQ(actual_quarter, quarter);
+  ASSERT_EQ(actual_day_of_week, day_of_week);
+  ASSERT_EQ(actual_hour, hour);
+  ASSERT_EQ(actual_minute, minute);
+  ASSERT_EQ(actual_second, second);
+  ASSERT_EQ(actual_millisecond, millisecond);
+  ASSERT_EQ(actual_microsecond, microsecond);
+  ASSERT_EQ(actual_nanosecond, nanosecond);
+
+  CheckScalarUnary("year", time_points, year);
+  CheckScalarUnary("month", time_points, month);
+  CheckScalarUnary("day", time_points, day);
+  CheckScalarUnary("day_of_year", time_points, day_of_year);
+  CheckScalarUnary("week", time_points, week);
+  CheckScalarUnary("quarter", time_points, quarter);
+  CheckScalarUnary("day_of_week", time_points, day_of_week);
+  CheckScalarUnary("hour", time_points, hour);
+  CheckScalarUnary("minute", time_points, minute);
+  CheckScalarUnary("second", time_points, second);
+  CheckScalarUnary("millisecond", time_points, millisecond);
+  CheckScalarUnary("microsecond", time_points, microsecond);
+  CheckScalarUnary("nanosecond", time_points, nanosecond);
+}
+
+TEST(ScalarTemporalTest, TestTemporalComponentExtraction) {
+  const char* json_second = "[59, 951866603, -2208981640, 2000000000]";
+  const char* json_milli = "[59000, 951866603000, -2208981640000, 2000000000000]";
+  const char* json_micro =
+      "[59000000, 951866603000000, -2208981640000000, 2000000000000000]";
+  const char* json_nano =
+      "[59000000000, 951866603000000000, -2208981640000000000, 2000000000000000000]";
+
+  auto time_points_second = ArrayFromJSON(timestamp(TimeUnit::SECOND), json_second);
+  auto time_points_milli = ArrayFromJSON(timestamp(TimeUnit::MILLI), json_milli);
+  auto time_points_micro = ArrayFromJSON(timestamp(TimeUnit::MICRO), json_micro);
+  auto time_points_nano = ArrayFromJSON(timestamp(TimeUnit::NANO), json_nano);
+
+  auto year = ArrayFromJSON(int64(), "[1970, 2000, 1900, 2033]");
+  auto month = ArrayFromJSON(int64(), "[1, 2, 1, 5]");
+  auto day = ArrayFromJSON(int64(), "[1, 29, 1, 18]");
+  auto day_of_year = ArrayFromJSON(int64(), "[1, 60, 1, 138]");
+  auto week = ArrayFromJSON(int64(), "[1, 9, 1, 20]");
+  auto quarter = ArrayFromJSON(int64(), "[1, 1, 1, 2]");
+  auto day_of_week = ArrayFromJSON(int64(), "[4, 2, 1, 3]");
+  auto hour = ArrayFromJSON(int64(), "[0, 23, 1, 3]");
+  auto minute = ArrayFromJSON(int64(), "[0, 23, 59, 33]");
+  auto second = ArrayFromJSON(int64(), "[59, 23, 20, 20]");
+  auto millisecond = ArrayFromJSON(int64(), "[0, 0, 0, 0]");
+  auto microsecond = ArrayFromJSON(int64(), "[0, 0, 0, 0]");
+  auto nanosecond = ArrayFromJSON(int64(), "[0, 0, 0, 0]");
+
+  for (auto time_points :
+       {time_points_second, time_points_milli, time_points_micro, time_points_nano}) {
+    CheckScalarUnary("year", time_points, year);
+    CheckScalarUnary("month", time_points, month);
+    CheckScalarUnary("day", time_points, day);
+    CheckScalarUnary("day_of_year", time_points, day_of_year);
+    CheckScalarUnary("week", time_points, week);
+    CheckScalarUnary("quarter", time_points, quarter);
+    CheckScalarUnary("day_of_week", time_points, day_of_week);
+    CheckScalarUnary("hour", time_points, hour);
+    CheckScalarUnary("minute", time_points, minute);
+    CheckScalarUnary("second", time_points, second);
+    CheckScalarUnary("millisecond", time_points, millisecond);
+    CheckScalarUnary("microsecond", time_points, microsecond);
+    CheckScalarUnary("nanosecond", time_points, nanosecond);
+  }
+
+  std::string in = "[123, 999, 1, 31231000]";
+  auto out = ArrayFromJSON(int64(), "[123, 999, 1, 0]");
+
+  auto tp_milli = ArrayFromJSON(timestamp(TimeUnit::MILLI), in);
+  auto tp_milli_zoned = ArrayFromJSON(timestamp(TimeUnit::MILLI, "Etc/GMT+2"), in);
+  CheckScalarUnary("millisecond", tp_milli, out);
+  CheckScalarUnary("millisecond", tp_milli, out);
+
+  auto tp_micro = ArrayFromJSON(timestamp(TimeUnit::MICRO), in);
+  auto tp_micro_zoned = ArrayFromJSON(timestamp(TimeUnit::MICRO, "Etc/GMT+2"), in);
+  CheckScalarUnary("microsecond", tp_micro, out);
+  CheckScalarUnary("microsecond", tp_micro_zoned, out);
+
+  auto tp_nano = ArrayFromJSON(timestamp(TimeUnit::NANO), in);
+  auto tp_nano_zoned = ArrayFromJSON(timestamp(TimeUnit::NANO, "Etc/GMT+2"), in);
+  CheckScalarUnary("nanosecond", tp_nano, out);
+  CheckScalarUnary("nanosecond", tp_nano_zoned, out);
+}
+
+TEST(ScalarTemporalTest, TestSimpleZonedTemporalComponentExtraction) {
+  const char* json =
+      R"(["1970-01-01T00:00:59","2000-02-29T23:23:23",
+          "3989-07-14T18:04:01","1900-01-01T01:59:20","2033-05-18T03:33:20"])";
+  auto time_points = ArrayFromJSON(timestamp(TimeUnit::SECOND, "Etc/GMT+2"), json);
+
+  auto year = ArrayFromJSON(int64(), "[1969, 2000, 3989, 1899, 2033]");
+  auto month = ArrayFromJSON(int64(), "[12, 2, 7, 12, 5]");
+  auto day = ArrayFromJSON(int64(), "[31, 29, 14, 31, 18]");
+  auto day_of_year = ArrayFromJSON(int64(), "[365, 60, 195, 365, 138]");
+  auto week = ArrayFromJSON(int64(), "[1, 9, 28, 52, 20]");
+  auto quarter = ArrayFromJSON(int64(), "[4, 1, 3, 4, 2]");
+  auto day_of_week = ArrayFromJSON(int64(), "[3, 2, 5, 7, 3]");
+  auto hour = ArrayFromJSON(int64(), "[22, 21, 16, 23, 1]");
+  auto minute = ArrayFromJSON(int64(), "[0, 23, 4, 59, 33]");
+  auto second = ArrayFromJSON(int64(), "[59, 23, 1, 20, 20]");
+  auto millisecond = ArrayFromJSON(int64(), "[0, 0, 0, 0, 0]");
+  auto microsecond = ArrayFromJSON(int64(), "[0, 0, 0, 0, 0]");
+  auto nanosecond = ArrayFromJSON(int64(), "[0, 0, 0, 0, 0]");
+
+  CheckScalarUnary("year", time_points, year);
+  CheckScalarUnary("month", time_points, month);
+  CheckScalarUnary("day", time_points, day);
+  CheckScalarUnary("day_of_year", time_points, day_of_year);
+  CheckScalarUnary("week", time_points, week);
+  CheckScalarUnary("quarter", time_points, quarter);
+  CheckScalarUnary("day_of_week", time_points, day_of_week);
+  CheckScalarUnary("hour", time_points, hour);
+  CheckScalarUnary("minute", time_points, minute);
+  CheckScalarUnary("second", time_points, second);
+  CheckScalarUnary("millisecond", time_points, millisecond);
+  CheckScalarUnary("microsecond", time_points, microsecond);
+  CheckScalarUnary("nanosecond", time_points, nanosecond);
+}
+
+TEST(ScalarTemporalTest, TestZonedTemporalComponentExtraction) {
+  std::string timezone = "Etc/GMT+2";

Review comment:
       Again, is this testing something different compared to above?

##########
File path: cpp/src/arrow/compute/kernels/scalar_temporal.cc
##########
@@ -0,0 +1,632 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/compute/kernels/common.h"
+#include "arrow/util/time.h"
+#include "arrow/vendored/datetime.h"
+
+namespace arrow {
+
+namespace compute {
+namespace internal {
+
+using arrow_vendored::date::days;
+using arrow_vendored::date::floor;
+using arrow_vendored::date::hh_mm_ss;
+using arrow_vendored::date::local_days;
+using arrow_vendored::date::locate_zone;
+using arrow_vendored::date::sys_days;
+using arrow_vendored::date::sys_time;
+using arrow_vendored::date::trunc;
+using arrow_vendored::date::weeks;
+using arrow_vendored::date::year_month_day;
+using arrow_vendored::date::years;
+using arrow_vendored::date::literals::dec;
+using arrow_vendored::date::literals::jan;
+using arrow_vendored::date::literals::last;
+using arrow_vendored::date::literals::mon;
+using arrow_vendored::date::literals::thu;
+
+template <typename Duration>
+inline year_month_day ymd_caster_template(const int64_t data) {
+  return year_month_day(floor<days>(sys_time<Duration>(Duration{data})));
+}
+
+template <typename Duration>
+inline std::function<year_month_day(const int64_t)> ymd_caster_zoned_template(
+    const std::string timezone) {
+  static const arrow_vendored::date::time_zone* tz = locate_zone(timezone);
+  return [](const int64_t data) {
+    return year_month_day(floor<days>(tz->to_local(sys_time<Duration>(Duration{data}))));
+  };
+}
+
+inline std::function<year_month_day(const int64_t)> make_ymd_caster(
+    const std::shared_ptr<DataType> type) {
+  const auto ts_type = std::static_pointer_cast<const TimestampType>(type);
+  const TimeUnit::type unit = ts_type->unit();
+  const std::string timezone = ts_type->timezone();
+
+  if (timezone.empty()) {
+    switch (unit) {
+      case TimeUnit::SECOND:
+        return ymd_caster_template<std::chrono::seconds>;
+      case TimeUnit::MILLI:
+        return ymd_caster_template<std::chrono::milliseconds>;
+      case TimeUnit::MICRO:
+        return ymd_caster_template<std::chrono::microseconds>;
+      case TimeUnit::NANO:
+        return ymd_caster_template<std::chrono::nanoseconds>;
+    }
+  } else {
+    switch (unit) {
+      case TimeUnit::SECOND:
+        return ymd_caster_zoned_template<std::chrono::seconds>(timezone);
+      case TimeUnit::MILLI:
+        return ymd_caster_zoned_template<std::chrono::milliseconds>(timezone);
+      case TimeUnit::MICRO:
+        return ymd_caster_zoned_template<std::chrono::microseconds>(timezone);
+      case TimeUnit::NANO:
+        return ymd_caster_zoned_template<std::chrono::nanoseconds>(timezone);
+    }
+  }
+  return ymd_caster_template<std::chrono::seconds>;
+}
+
+template <typename DurationIn, typename DurationOut>
+inline hh_mm_ss<DurationOut> hhmmss_caster_template(const int64_t data) {
+  DurationIn t = DurationIn{data};
+  return hh_mm_ss<DurationOut>(
+      std::chrono::duration_cast<DurationOut>(t - floor<days>(t)));
+}
+
+template <typename DurationIn, typename DurationOut>
+inline std::function<hh_mm_ss<DurationOut>(const int64_t)> hhmmss_caster_zoned_template(
+    const std::string timezone) {
+  static const arrow_vendored::date::time_zone* tz = locate_zone(timezone);
+  return [](const int64_t data) {
+    const auto z = sys_time<DurationIn>(DurationIn{data});
+    const auto l = make_zoned(tz, z).get_local_time();
+    return hh_mm_ss<DurationOut>(
+        std::chrono::duration_cast<DurationOut>(l - floor<days>(l)));
+  };
+}
+
+template <typename Duration>
+inline std::function<hh_mm_ss<Duration>(const int64_t)> make_hhmmss_caster(
+    const std::shared_ptr<DataType> type) {
+  const auto ts_type = std::static_pointer_cast<const TimestampType>(type);
+  const TimeUnit::type unit = ts_type->unit();
+  const std::string timezone = ts_type->timezone();
+
+  if (timezone.empty()) {
+    switch (unit) {
+      case TimeUnit::SECOND:
+        return hhmmss_caster_template<std::chrono::seconds, Duration>;
+      case TimeUnit::MILLI:
+        return hhmmss_caster_template<std::chrono::milliseconds, Duration>;
+      case TimeUnit::MICRO:
+        return hhmmss_caster_template<std::chrono::microseconds, Duration>;
+      case TimeUnit::NANO:
+        return hhmmss_caster_template<std::chrono::nanoseconds, Duration>;
+    }
+  } else {
+    switch (unit) {
+      case TimeUnit::SECOND:
+        return hhmmss_caster_zoned_template<std::chrono::seconds, Duration>(timezone);
+      case TimeUnit::MILLI:
+        return hhmmss_caster_zoned_template<std::chrono::milliseconds, Duration>(
+            timezone);
+      case TimeUnit::MICRO:
+        return hhmmss_caster_zoned_template<std::chrono::microseconds, Duration>(
+            timezone);
+      case TimeUnit::NANO:
+        return hhmmss_caster_zoned_template<std::chrono::nanoseconds, Duration>(timezone);
+    }
+  }
+  return hhmmss_caster_template<std::chrono::seconds, Duration>;
+}
+
+template <typename Duration>
+inline unsigned day_of_year_caster_template(const int64_t data) {
+  const auto sd = sys_days{floor<days>(Duration{data})};
+  const auto y = year_month_day(sd).year();
+  return static_cast<unsigned>((sd - sys_days(y / jan / 0)).count());
+}
+
+template <typename Duration>
+inline std::function<unsigned(const int64_t)> day_of_year_zoned_caster_template(
+    const std::string timezone) {
+  static const arrow_vendored::date::time_zone* tz = locate_zone(timezone);
+  return [](const int64_t data) {
+    auto ld =
+        year_month_day(floor<days>(tz->to_local(sys_time<Duration>(Duration{data}))));
+    return static_cast<unsigned>(
+        (local_days(ld) - local_days(ld.year() / jan / 1) + days{1}).count());
+  };
+}
+
+inline std::function<unsigned(const int64_t)> get_day_of_year_caster(
+    const std::shared_ptr<DataType> type) {
+  const auto ts_type = std::static_pointer_cast<const TimestampType>(type);
+  const TimeUnit::type unit = ts_type->unit();
+  const std::string timezone = ts_type->timezone();
+
+  if (timezone.empty()) {
+    switch (unit) {
+      case TimeUnit::SECOND:
+        return day_of_year_caster_template<std::chrono::seconds>;
+      case TimeUnit::MILLI:
+        return day_of_year_caster_template<std::chrono::milliseconds>;
+      case TimeUnit::MICRO:
+        return day_of_year_caster_template<std::chrono::microseconds>;
+      case TimeUnit::NANO:
+        return day_of_year_caster_template<std::chrono::nanoseconds>;
+    }
+  } else {
+    switch (unit) {
+      case TimeUnit::SECOND:
+        return day_of_year_zoned_caster_template<std::chrono::seconds>(timezone);
+      case TimeUnit::MILLI:
+        return day_of_year_zoned_caster_template<std::chrono::milliseconds>(timezone);
+      case TimeUnit::MICRO:
+        return day_of_year_zoned_caster_template<std::chrono::microseconds>(timezone);
+      case TimeUnit::NANO:
+        return day_of_year_zoned_caster_template<std::chrono::nanoseconds>(timezone);
+    }
+  }
+  return day_of_year_caster_template<std::chrono::seconds>;
+}
+
+template <typename Duration>
+inline unsigned week_caster_template(const int64_t data) {
+  // Based on
+  // https://github.com/HowardHinnant/date/blob/6e921e1b1d21e84a5c82416ba7ecd98e33a436d0/include/date/iso_week.h#L1503
+  const auto dp = sys_days{floor<days>(Duration{data})};
+  auto y = year_month_day{dp + days{3}}.year();
+  auto start = sys_days((y - years{1}) / dec / thu[last]) + (mon - thu);
+  if (dp < start) {
+    --y;
+    start = sys_days((y - years{1}) / dec / thu[last]) + (mon - thu);
+  }
+  return static_cast<unsigned>(trunc<weeks>(dp - start).count() + 1);
+}
+
+template <typename Duration>
+inline std::function<unsigned(const int64_t)> week_zoned_caster_template(
+    const std::string timezone) {
+  static const arrow_vendored::date::time_zone* tz = locate_zone(timezone);
+  return [](const int64_t data) {
+    const auto ld = floor<days>(tz->to_local(sys_time<Duration>(Duration{data})));
+    auto y = year_month_day{ld + days{3}}.year();
+    auto start = local_days((y - years{1}) / dec / thu[last]) + (mon - thu);
+    if (ld < start) {
+      --y;
+      start = local_days((y - years{1}) / dec / thu[last]) + (mon - thu);
+    }
+    return static_cast<unsigned>(trunc<weeks>(local_days(ld) - start).count() + 1);
+  };
+}
+
+inline std::function<unsigned(const int64_t)> make_week_caster(
+    const std::shared_ptr<DataType> type) {
+  const auto ts_type = std::static_pointer_cast<const TimestampType>(type);
+  const TimeUnit::type unit = ts_type->unit();
+  const std::string timezone = ts_type->timezone();
+
+  if (timezone.empty()) {
+    switch (unit) {
+      case TimeUnit::SECOND:
+        return week_caster_template<std::chrono::seconds>;
+      case TimeUnit::MILLI:
+        return week_caster_template<std::chrono::milliseconds>;
+      case TimeUnit::MICRO:
+        return week_caster_template<std::chrono::microseconds>;
+      case TimeUnit::NANO:
+        return week_caster_template<std::chrono::nanoseconds>;
+    }
+  } else {
+    switch (unit) {
+      case TimeUnit::SECOND:
+        return week_zoned_caster_template<std::chrono::seconds>(timezone);
+      case TimeUnit::MILLI:
+        return week_zoned_caster_template<std::chrono::milliseconds>(timezone);
+      case TimeUnit::MICRO:
+        return week_zoned_caster_template<std::chrono::microseconds>(timezone);
+      case TimeUnit::NANO:
+        return week_zoned_caster_template<std::chrono::nanoseconds>(timezone);
+    }
+  }
+  return day_of_year_caster_template<std::chrono::seconds>;
+}
+
+// ----------------------------------------------------------------------
+// Extract year from timestamp
+
+template <typename out_type>
+struct Year {
+  static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) {
+    const auto& in_data = internal::UnboxScalar<const TimestampType>::Unbox(in);
+    auto ymd_caster = make_ymd_caster(in.type);
+    checked_cast<Int64Scalar*>(out)->value = static_cast<int>(ymd_caster(in_data).year());
+    return Status::OK();
+  }
+
+  static Status Call(KernelContext* ctx, const ArrayData& in, ArrayData* out) {
+    auto in_data = in.GetValues<uint64_t>(1);
+    auto out_data = out->GetMutableValues<out_type>(1);
+    auto ymd_caster = make_ymd_caster(in.type);
+    for (int64_t i = 0; i < in.length; i++) {
+      out_data[i] = static_cast<int>(ymd_caster(in_data[i]).year());
+    }
+    return Status::OK();
+  }
+};
+
+// ----------------------------------------------------------------------
+// Extract month from timestamp
+
+template <typename out_type>
+struct Month {
+  static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) {
+    const auto& in_data = internal::UnboxScalar<const TimestampType>::Unbox(in);
+    auto ymd_caster = make_ymd_caster(in.type);
+    checked_cast<Int64Scalar*>(out)->value =
+        static_cast<unsigned>(ymd_caster(in_data).month());
+    return Status::OK();
+  }
+
+  static Status Call(KernelContext* ctx, const ArrayData& in, ArrayData* out) {
+    auto in_data = in.GetValues<uint64_t>(1);
+    auto out_data = out->GetMutableValues<out_type>(1);
+    auto ymd_caster = make_ymd_caster(in.type);
+    for (int64_t i = 0; i < in.length; i++) {
+      out_data[i] = static_cast<unsigned>(ymd_caster(in_data[i]).month());
+    }
+    return Status::OK();
+  }
+};
+
+// ----------------------------------------------------------------------
+// Extract day from timestamp
+
+template <typename out_type>
+struct Day {
+  static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) {
+    const int64_t& in_data = internal::UnboxScalar<const TimestampType>::Unbox(in);
+    auto ymd_caster = make_ymd_caster(in.type);
+    checked_cast<Int64Scalar*>(out)->value =
+        static_cast<unsigned>(ymd_caster(in_data).day());
+    return Status::OK();
+  }
+
+  static Status Call(KernelContext* ctx, const ArrayData& in, ArrayData* out) {
+    auto in_data = in.GetValues<uint64_t>(1);
+    auto out_data = out->GetMutableValues<out_type>(1);
+    auto ymd_caster = make_ymd_caster(in.type);
+    for (int64_t i = 0; i < in.length; i++) {
+      out_data[i] = static_cast<unsigned>(ymd_caster(in_data[i]).day());
+    }
+    return Status::OK();
+  }
+};
+
+// ----------------------------------------------------------------------
+// Extract week from timestamp
+
+template <typename out_type>
+struct Week {
+  static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) {
+    const auto& in_data = internal::UnboxScalar<const TimestampType>::Unbox(in);
+    auto week_caster = make_week_caster(in.type);
+    checked_cast<Int64Scalar*>(out)->value = week_caster(in_data);
+    return Status::OK();
+  }
+
+  static Status Call(KernelContext* ctx, const ArrayData& in, ArrayData* out) {
+    auto in_data = in.GetValues<uint64_t>(1);
+    auto out_data = out->GetMutableValues<out_type>(1);
+    auto week_caster = make_week_caster(in.type);
+    for (int64_t i = 0; i < in.length; i++) {
+      out_data[i] = week_caster(in_data[i]);
+    }
+    return Status::OK();
+  }
+};
+
+// ----------------------------------------------------------------------
+// Extract quarter from timestamp
+
+template <typename out_type>
+struct Quarter {
+  static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) {
+    const auto& in_data = internal::UnboxScalar<const TimestampType>::Unbox(in);
+    auto ymd_caster = make_ymd_caster(in.type);
+    checked_cast<Int64Scalar*>(out)->value =
+        (static_cast<unsigned>(ymd_caster(in_data).month()) - 1) / 3 + 1;
+    return Status::OK();
+  }
+
+  static Status Call(KernelContext* ctx, const ArrayData& in, ArrayData* out) {
+    auto in_data = in.GetValues<uint64_t>(1);
+    auto out_data = out->GetMutableValues<out_type>(1);
+    auto ymd_caster = make_ymd_caster(in.type);
+    for (int64_t i = 0; i < in.length; i++) {
+      out_data[i] = (static_cast<unsigned>(ymd_caster(in_data[i]).month()) - 1) / 3 + 1;
+    }
+    return Status::OK();
+  }
+};

Review comment:
       You're repeating yourself a lot. You basically have specific extraction function (the "caster" functions) and a generic kernel structure that just applies the extraction function. It should be possible to minimize the amount of code copied and pasted in this file.

##########
File path: cpp/src/arrow/compute/kernels/scalar_temporal.cc
##########
@@ -0,0 +1,632 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/compute/kernels/common.h"
+#include "arrow/util/time.h"
+#include "arrow/vendored/datetime.h"
+
+namespace arrow {
+
+namespace compute {
+namespace internal {
+
+using arrow_vendored::date::days;
+using arrow_vendored::date::floor;
+using arrow_vendored::date::hh_mm_ss;
+using arrow_vendored::date::local_days;
+using arrow_vendored::date::locate_zone;
+using arrow_vendored::date::sys_days;
+using arrow_vendored::date::sys_time;
+using arrow_vendored::date::trunc;
+using arrow_vendored::date::weeks;
+using arrow_vendored::date::year_month_day;
+using arrow_vendored::date::years;
+using arrow_vendored::date::literals::dec;
+using arrow_vendored::date::literals::jan;
+using arrow_vendored::date::literals::last;
+using arrow_vendored::date::literals::mon;
+using arrow_vendored::date::literals::thu;
+
+template <typename Duration>
+inline year_month_day ymd_caster_template(const int64_t data) {

Review comment:
       Please follow the coding convention guidelines, i.e. `CamelCase` for functions.

##########
File path: cpp/src/arrow/compute/kernels/scalar_temporal.cc
##########
@@ -0,0 +1,632 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/compute/kernels/common.h"
+#include "arrow/util/time.h"
+#include "arrow/vendored/datetime.h"
+
+namespace arrow {
+
+namespace compute {
+namespace internal {
+
+using arrow_vendored::date::days;
+using arrow_vendored::date::floor;
+using arrow_vendored::date::hh_mm_ss;
+using arrow_vendored::date::local_days;
+using arrow_vendored::date::locate_zone;
+using arrow_vendored::date::sys_days;
+using arrow_vendored::date::sys_time;
+using arrow_vendored::date::trunc;
+using arrow_vendored::date::weeks;
+using arrow_vendored::date::year_month_day;
+using arrow_vendored::date::years;
+using arrow_vendored::date::literals::dec;
+using arrow_vendored::date::literals::jan;
+using arrow_vendored::date::literals::last;
+using arrow_vendored::date::literals::mon;
+using arrow_vendored::date::literals::thu;
+
+template <typename Duration>
+inline year_month_day ymd_caster_template(const int64_t data) {
+  return year_month_day(floor<days>(sys_time<Duration>(Duration{data})));
+}
+
+template <typename Duration>
+inline std::function<year_month_day(const int64_t)> ymd_caster_zoned_template(
+    const std::string timezone) {
+  static const arrow_vendored::date::time_zone* tz = locate_zone(timezone);
+  return [](const int64_t data) {
+    return year_month_day(floor<days>(tz->to_local(sys_time<Duration>(Duration{data}))));
+  };
+}
+
+inline std::function<year_month_day(const int64_t)> make_ymd_caster(
+    const std::shared_ptr<DataType> type) {
+  const auto ts_type = std::static_pointer_cast<const TimestampType>(type);
+  const TimeUnit::type unit = ts_type->unit();
+  const std::string timezone = ts_type->timezone();
+
+  if (timezone.empty()) {
+    switch (unit) {
+      case TimeUnit::SECOND:
+        return ymd_caster_template<std::chrono::seconds>;
+      case TimeUnit::MILLI:
+        return ymd_caster_template<std::chrono::milliseconds>;
+      case TimeUnit::MICRO:
+        return ymd_caster_template<std::chrono::microseconds>;
+      case TimeUnit::NANO:
+        return ymd_caster_template<std::chrono::nanoseconds>;
+    }
+  } else {
+    switch (unit) {
+      case TimeUnit::SECOND:
+        return ymd_caster_zoned_template<std::chrono::seconds>(timezone);
+      case TimeUnit::MILLI:
+        return ymd_caster_zoned_template<std::chrono::milliseconds>(timezone);
+      case TimeUnit::MICRO:
+        return ymd_caster_zoned_template<std::chrono::microseconds>(timezone);
+      case TimeUnit::NANO:
+        return ymd_caster_zoned_template<std::chrono::nanoseconds>(timezone);
+    }
+  }
+  return ymd_caster_template<std::chrono::seconds>;
+}
+
+template <typename DurationIn, typename DurationOut>
+inline hh_mm_ss<DurationOut> hhmmss_caster_template(const int64_t data) {
+  DurationIn t = DurationIn{data};
+  return hh_mm_ss<DurationOut>(
+      std::chrono::duration_cast<DurationOut>(t - floor<days>(t)));
+}
+
+template <typename DurationIn, typename DurationOut>
+inline std::function<hh_mm_ss<DurationOut>(const int64_t)> hhmmss_caster_zoned_template(
+    const std::string timezone) {
+  static const arrow_vendored::date::time_zone* tz = locate_zone(timezone);
+  return [](const int64_t data) {
+    const auto z = sys_time<DurationIn>(DurationIn{data});
+    const auto l = make_zoned(tz, z).get_local_time();
+    return hh_mm_ss<DurationOut>(
+        std::chrono::duration_cast<DurationOut>(l - floor<days>(l)));
+  };
+}
+
+template <typename Duration>
+inline std::function<hh_mm_ss<Duration>(const int64_t)> make_hhmmss_caster(
+    const std::shared_ptr<DataType> type) {
+  const auto ts_type = std::static_pointer_cast<const TimestampType>(type);
+  const TimeUnit::type unit = ts_type->unit();
+  const std::string timezone = ts_type->timezone();
+
+  if (timezone.empty()) {
+    switch (unit) {
+      case TimeUnit::SECOND:
+        return hhmmss_caster_template<std::chrono::seconds, Duration>;
+      case TimeUnit::MILLI:
+        return hhmmss_caster_template<std::chrono::milliseconds, Duration>;
+      case TimeUnit::MICRO:
+        return hhmmss_caster_template<std::chrono::microseconds, Duration>;
+      case TimeUnit::NANO:
+        return hhmmss_caster_template<std::chrono::nanoseconds, Duration>;
+    }
+  } else {
+    switch (unit) {
+      case TimeUnit::SECOND:
+        return hhmmss_caster_zoned_template<std::chrono::seconds, Duration>(timezone);
+      case TimeUnit::MILLI:
+        return hhmmss_caster_zoned_template<std::chrono::milliseconds, Duration>(
+            timezone);
+      case TimeUnit::MICRO:
+        return hhmmss_caster_zoned_template<std::chrono::microseconds, Duration>(
+            timezone);
+      case TimeUnit::NANO:
+        return hhmmss_caster_zoned_template<std::chrono::nanoseconds, Duration>(timezone);
+    }
+  }
+  return hhmmss_caster_template<std::chrono::seconds, Duration>;
+}
+
+template <typename Duration>
+inline unsigned day_of_year_caster_template(const int64_t data) {
+  const auto sd = sys_days{floor<days>(Duration{data})};
+  const auto y = year_month_day(sd).year();
+  return static_cast<unsigned>((sd - sys_days(y / jan / 0)).count());
+}
+
+template <typename Duration>
+inline std::function<unsigned(const int64_t)> day_of_year_zoned_caster_template(
+    const std::string timezone) {
+  static const arrow_vendored::date::time_zone* tz = locate_zone(timezone);
+  return [](const int64_t data) {
+    auto ld =
+        year_month_day(floor<days>(tz->to_local(sys_time<Duration>(Duration{data}))));
+    return static_cast<unsigned>(
+        (local_days(ld) - local_days(ld.year() / jan / 1) + days{1}).count());
+  };
+}
+
+inline std::function<unsigned(const int64_t)> get_day_of_year_caster(
+    const std::shared_ptr<DataType> type) {
+  const auto ts_type = std::static_pointer_cast<const TimestampType>(type);
+  const TimeUnit::type unit = ts_type->unit();
+  const std::string timezone = ts_type->timezone();
+
+  if (timezone.empty()) {
+    switch (unit) {
+      case TimeUnit::SECOND:
+        return day_of_year_caster_template<std::chrono::seconds>;
+      case TimeUnit::MILLI:
+        return day_of_year_caster_template<std::chrono::milliseconds>;
+      case TimeUnit::MICRO:
+        return day_of_year_caster_template<std::chrono::microseconds>;
+      case TimeUnit::NANO:
+        return day_of_year_caster_template<std::chrono::nanoseconds>;
+    }
+  } else {
+    switch (unit) {
+      case TimeUnit::SECOND:
+        return day_of_year_zoned_caster_template<std::chrono::seconds>(timezone);
+      case TimeUnit::MILLI:
+        return day_of_year_zoned_caster_template<std::chrono::milliseconds>(timezone);
+      case TimeUnit::MICRO:
+        return day_of_year_zoned_caster_template<std::chrono::microseconds>(timezone);
+      case TimeUnit::NANO:
+        return day_of_year_zoned_caster_template<std::chrono::nanoseconds>(timezone);
+    }
+  }
+  return day_of_year_caster_template<std::chrono::seconds>;
+}
+
+template <typename Duration>
+inline unsigned week_caster_template(const int64_t data) {
+  // Based on
+  // https://github.com/HowardHinnant/date/blob/6e921e1b1d21e84a5c82416ba7ecd98e33a436d0/include/date/iso_week.h#L1503
+  const auto dp = sys_days{floor<days>(Duration{data})};
+  auto y = year_month_day{dp + days{3}}.year();
+  auto start = sys_days((y - years{1}) / dec / thu[last]) + (mon - thu);
+  if (dp < start) {
+    --y;
+    start = sys_days((y - years{1}) / dec / thu[last]) + (mon - thu);
+  }
+  return static_cast<unsigned>(trunc<weeks>(dp - start).count() + 1);
+}
+
+template <typename Duration>
+inline std::function<unsigned(const int64_t)> week_zoned_caster_template(
+    const std::string timezone) {
+  static const arrow_vendored::date::time_zone* tz = locate_zone(timezone);
+  return [](const int64_t data) {
+    const auto ld = floor<days>(tz->to_local(sys_time<Duration>(Duration{data})));
+    auto y = year_month_day{ld + days{3}}.year();
+    auto start = local_days((y - years{1}) / dec / thu[last]) + (mon - thu);
+    if (ld < start) {
+      --y;
+      start = local_days((y - years{1}) / dec / thu[last]) + (mon - thu);
+    }
+    return static_cast<unsigned>(trunc<weeks>(local_days(ld) - start).count() + 1);
+  };
+}
+
+inline std::function<unsigned(const int64_t)> make_week_caster(
+    const std::shared_ptr<DataType> type) {
+  const auto ts_type = std::static_pointer_cast<const TimestampType>(type);
+  const TimeUnit::type unit = ts_type->unit();
+  const std::string timezone = ts_type->timezone();
+
+  if (timezone.empty()) {
+    switch (unit) {
+      case TimeUnit::SECOND:
+        return week_caster_template<std::chrono::seconds>;
+      case TimeUnit::MILLI:
+        return week_caster_template<std::chrono::milliseconds>;
+      case TimeUnit::MICRO:
+        return week_caster_template<std::chrono::microseconds>;
+      case TimeUnit::NANO:
+        return week_caster_template<std::chrono::nanoseconds>;
+    }
+  } else {
+    switch (unit) {
+      case TimeUnit::SECOND:
+        return week_zoned_caster_template<std::chrono::seconds>(timezone);
+      case TimeUnit::MILLI:
+        return week_zoned_caster_template<std::chrono::milliseconds>(timezone);
+      case TimeUnit::MICRO:
+        return week_zoned_caster_template<std::chrono::microseconds>(timezone);
+      case TimeUnit::NANO:
+        return week_zoned_caster_template<std::chrono::nanoseconds>(timezone);
+    }
+  }
+  return day_of_year_caster_template<std::chrono::seconds>;
+}
+
+// ----------------------------------------------------------------------
+// Extract year from timestamp
+
+template <typename out_type>

Review comment:
       As per the coding conventions, this should probably `OutType`.

##########
File path: cpp/src/arrow/compute/kernels/scalar_temporal_test.cc
##########
@@ -0,0 +1,239 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+#include "arrow/compute/api_scalar.h"
+#include "arrow/compute/kernels/test_util.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/formatting.h"
+
+namespace arrow {
+
+using internal::StringFormatter;
+
+class ScalarTemporalTest : public ::testing::Test {};
+
+namespace compute {
+
+TEST(ScalarTemporalTest, TestSimpleTemporalComponentExtraction) {
+  const char* json =
+      R"(["1970-01-01T00:00:59","2000-02-29T23:23:23",
+          "3989-07-14T18:04:01","1900-01-01T01:59:20","2033-05-18T03:33:20"])";
+  auto time_points = ArrayFromJSON(timestamp(TimeUnit::SECOND), json);
+
+  auto year = ArrayFromJSON(int64(), "[1970, 2000, 3989, 1900, 2033]");
+  auto month = ArrayFromJSON(int64(), "[1, 2, 7, 1, 5]");
+  auto day = ArrayFromJSON(int64(), "[1, 29, 14, 1, 18]");
+  auto day_of_year = ArrayFromJSON(int64(), "[1, 60, 195, 1, 138]");
+  auto week = ArrayFromJSON(int64(), "[1, 9, 28, 1, 20]");
+  auto quarter = ArrayFromJSON(int64(), "[1, 1, 3, 1, 2]");
+  auto day_of_week = ArrayFromJSON(int64(), "[4, 2, 5, 1, 3]");
+  auto hour = ArrayFromJSON(int64(), "[0, 23, 18, 1, 3]");
+  auto minute = ArrayFromJSON(int64(), "[0, 23, 4, 59, 33]");
+  auto second = ArrayFromJSON(int64(), "[59, 23, 1, 20, 20]");
+  auto millisecond = ArrayFromJSON(int64(), "[0, 0, 0, 0, 0]");
+  auto microsecond = ArrayFromJSON(int64(), "[0, 0, 0, 0, 0]");
+  auto nanosecond = ArrayFromJSON(int64(), "[0, 0, 0, 0, 0]");
+
+  ASSERT_OK_AND_ASSIGN(Datum actual_year, Year(time_points));
+  ASSERT_OK_AND_ASSIGN(Datum actual_month, Month(time_points));
+  ASSERT_OK_AND_ASSIGN(Datum actual_day, Day(time_points));
+  ASSERT_OK_AND_ASSIGN(Datum actual_day_of_year, DayOfYear(time_points));
+  ASSERT_OK_AND_ASSIGN(Datum actual_week, Week(time_points));
+  ASSERT_OK_AND_ASSIGN(Datum actual_quarter, Quarter(time_points));
+  ASSERT_OK_AND_ASSIGN(Datum actual_day_of_week, DayOfWeek(time_points));
+  ASSERT_OK_AND_ASSIGN(Datum actual_hour, Hour(time_points));
+  ASSERT_OK_AND_ASSIGN(Datum actual_minute, Minute(time_points));
+  ASSERT_OK_AND_ASSIGN(Datum actual_second, Second(time_points));
+  ASSERT_OK_AND_ASSIGN(Datum actual_millisecond, Millisecond(time_points));
+  ASSERT_OK_AND_ASSIGN(Datum actual_microsecond, Microsecond(time_points));
+  ASSERT_OK_AND_ASSIGN(Datum actual_nanosecond, Nanosecond(time_points));
+
+  ASSERT_EQ(actual_year, year);
+  ASSERT_EQ(actual_month, month);
+  ASSERT_EQ(actual_day, day);
+  ASSERT_EQ(actual_day_of_year, day_of_year);
+  ASSERT_EQ(actual_week, week);
+  ASSERT_EQ(actual_quarter, quarter);
+  ASSERT_EQ(actual_day_of_week, day_of_week);
+  ASSERT_EQ(actual_hour, hour);
+  ASSERT_EQ(actual_minute, minute);
+  ASSERT_EQ(actual_second, second);
+  ASSERT_EQ(actual_millisecond, millisecond);
+  ASSERT_EQ(actual_microsecond, microsecond);
+  ASSERT_EQ(actual_nanosecond, nanosecond);
+
+  CheckScalarUnary("year", time_points, year);
+  CheckScalarUnary("month", time_points, month);
+  CheckScalarUnary("day", time_points, day);
+  CheckScalarUnary("day_of_year", time_points, day_of_year);
+  CheckScalarUnary("week", time_points, week);
+  CheckScalarUnary("quarter", time_points, quarter);
+  CheckScalarUnary("day_of_week", time_points, day_of_week);
+  CheckScalarUnary("hour", time_points, hour);
+  CheckScalarUnary("minute", time_points, minute);
+  CheckScalarUnary("second", time_points, second);
+  CheckScalarUnary("millisecond", time_points, millisecond);
+  CheckScalarUnary("microsecond", time_points, microsecond);
+  CheckScalarUnary("nanosecond", time_points, nanosecond);
+}
+
+TEST(ScalarTemporalTest, TestTemporalComponentExtraction) {
+  const char* json_second = "[59, 951866603, -2208981640, 2000000000]";
+  const char* json_milli = "[59000, 951866603000, -2208981640000, 2000000000000]";
+  const char* json_micro =
+      "[59000000, 951866603000000, -2208981640000000, 2000000000000000]";
+  const char* json_nano =
+      "[59000000000, 951866603000000000, -2208981640000000000, 2000000000000000000]";
+
+  auto time_points_second = ArrayFromJSON(timestamp(TimeUnit::SECOND), json_second);
+  auto time_points_milli = ArrayFromJSON(timestamp(TimeUnit::MILLI), json_milli);
+  auto time_points_micro = ArrayFromJSON(timestamp(TimeUnit::MICRO), json_micro);
+  auto time_points_nano = ArrayFromJSON(timestamp(TimeUnit::NANO), json_nano);
+
+  auto year = ArrayFromJSON(int64(), "[1970, 2000, 1900, 2033]");
+  auto month = ArrayFromJSON(int64(), "[1, 2, 1, 5]");
+  auto day = ArrayFromJSON(int64(), "[1, 29, 1, 18]");
+  auto day_of_year = ArrayFromJSON(int64(), "[1, 60, 1, 138]");
+  auto week = ArrayFromJSON(int64(), "[1, 9, 1, 20]");
+  auto quarter = ArrayFromJSON(int64(), "[1, 1, 1, 2]");
+  auto day_of_week = ArrayFromJSON(int64(), "[4, 2, 1, 3]");
+  auto hour = ArrayFromJSON(int64(), "[0, 23, 1, 3]");
+  auto minute = ArrayFromJSON(int64(), "[0, 23, 59, 33]");
+  auto second = ArrayFromJSON(int64(), "[59, 23, 20, 20]");
+  auto millisecond = ArrayFromJSON(int64(), "[0, 0, 0, 0]");
+  auto microsecond = ArrayFromJSON(int64(), "[0, 0, 0, 0]");
+  auto nanosecond = ArrayFromJSON(int64(), "[0, 0, 0, 0]");
+
+  for (auto time_points :
+       {time_points_second, time_points_milli, time_points_micro, time_points_nano}) {
+    CheckScalarUnary("year", time_points, year);
+    CheckScalarUnary("month", time_points, month);
+    CheckScalarUnary("day", time_points, day);
+    CheckScalarUnary("day_of_year", time_points, day_of_year);
+    CheckScalarUnary("week", time_points, week);
+    CheckScalarUnary("quarter", time_points, quarter);
+    CheckScalarUnary("day_of_week", time_points, day_of_week);
+    CheckScalarUnary("hour", time_points, hour);
+    CheckScalarUnary("minute", time_points, minute);
+    CheckScalarUnary("second", time_points, second);
+    CheckScalarUnary("millisecond", time_points, millisecond);
+    CheckScalarUnary("microsecond", time_points, microsecond);
+    CheckScalarUnary("nanosecond", time_points, nanosecond);
+  }
+
+  std::string in = "[123, 999, 1, 31231000]";
+  auto out = ArrayFromJSON(int64(), "[123, 999, 1, 0]");
+
+  auto tp_milli = ArrayFromJSON(timestamp(TimeUnit::MILLI), in);
+  auto tp_milli_zoned = ArrayFromJSON(timestamp(TimeUnit::MILLI, "Etc/GMT+2"), in);
+  CheckScalarUnary("millisecond", tp_milli, out);
+  CheckScalarUnary("millisecond", tp_milli, out);
+
+  auto tp_micro = ArrayFromJSON(timestamp(TimeUnit::MICRO), in);
+  auto tp_micro_zoned = ArrayFromJSON(timestamp(TimeUnit::MICRO, "Etc/GMT+2"), in);
+  CheckScalarUnary("microsecond", tp_micro, out);
+  CheckScalarUnary("microsecond", tp_micro_zoned, out);
+
+  auto tp_nano = ArrayFromJSON(timestamp(TimeUnit::NANO), in);
+  auto tp_nano_zoned = ArrayFromJSON(timestamp(TimeUnit::NANO, "Etc/GMT+2"), in);
+  CheckScalarUnary("nanosecond", tp_nano, out);
+  CheckScalarUnary("nanosecond", tp_nano_zoned, out);
+}
+
+TEST(ScalarTemporalTest, TestSimpleZonedTemporalComponentExtraction) {
+  const char* json =
+      R"(["1970-01-01T00:00:59","2000-02-29T23:23:23",
+          "3989-07-14T18:04:01","1900-01-01T01:59:20","2033-05-18T03:33:20"])";
+  auto time_points = ArrayFromJSON(timestamp(TimeUnit::SECOND, "Etc/GMT+2"), json);
+
+  auto year = ArrayFromJSON(int64(), "[1969, 2000, 3989, 1899, 2033]");

Review comment:
       Hmm... can you explain the reasoning here? If the timestamp is situated in 1970, I would expect the year to be 1970.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org