You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2021/06/01 22:08:22 UTC

[GitHub] [arrow] jvictorhuguenin opened a new pull request #10432: ARROW-12924: [Gandiva][C++] Implement CONVERT_TIMEZONE SQL function in Gandiva

jvictorhuguenin opened a new pull request #10432:
URL: https://github.com/apache/arrow/pull/10432


   Converts timestamp to specified timezone. If the sourceTimezone parameter is not present, Dremio assumes the timestamp provided in the third parameter is in UTC format. The sourceTimezone and destinationTimezone parameters accept any of the following values: timezone name from sys.timezone_names, timezone abbreviation from sys.timezone_abbrevs, and offset, such as +02:00.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] jvictorhuguenin commented on a change in pull request #10432: ARROW-12924: [Gandiva][C++] Implement CONVERT_TIMEZONE SQL function in Gandiva

Posted by GitBox <gi...@apache.org>.
jvictorhuguenin commented on a change in pull request #10432:
URL: https://github.com/apache/arrow/pull/10432#discussion_r813816522



##########
File path: cpp/src/gandiva/convert_timezone_holder.h
##########
@@ -0,0 +1,282 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+
+#include "arrow/vendored/datetime/date.h"
+#include "arrow/vendored/datetime/tz.h"
+#include "gandiva/function_holder.h"
+#include "gandiva/node.h"
+#include "gandiva/visibility.h"
+
+using arrow_vendored::date::local_time;
+using arrow_vendored::date::locate_zone;
+using arrow_vendored::date::make_zoned;
+using arrow_vendored::date::sys_time;
+using arrow_vendored::date::time_zone;
+using std::chrono::milliseconds;
+
+namespace gandiva {
+
+class OffsetZone {
+  std::chrono::seconds offset_;
+
+ public:
+  explicit OffsetZone(std::chrono::seconds offset) : offset_(offset) {}
+
+  template <class Duration>
+  auto to_local(arrow_vendored::date::sys_time<Duration> tp) const {
+    using std::common_type_t;
+    using std::chrono::seconds;
+    using LT = local_time<std::common_type_t<Duration, seconds>>;
+    return LT((tp + offset_).time_since_epoch());
+  }
+
+  template <class Duration>
+  auto to_sys(arrow_vendored::date::local_time<Duration> tp) const {
+    using std::chrono::seconds;
+    using ST = sys_time<std::common_type_t<Duration, seconds>>;
+    return ST((tp - offset_).time_since_epoch());
+  }
+};
+
+/// Function Holder for SQL 'CONVERT_TIMEZONE'
+class GANDIVA_EXPORT ConvertTimezoneHolder : public FunctionHolder {
+ public:
+  ~ConvertTimezoneHolder() override = default;
+
+  static Status Make(const FunctionNode& node,
+                     std::shared_ptr<ConvertTimezoneHolder>* holder);
+
+  static Status Make(const std::string& srcTz, const std::string& destTz,
+                     std::shared_ptr<ConvertTimezoneHolder>* holder);
+
+  /// Return the converted timestamp
+  int64_t convert(const int64_t src_timestamp) {
+    using std::chrono::seconds;
+
+    if (dest_timezone != NULLPTR && src_timezone == NULLPTR) {
+      return dest_timezone
+          ->to_local(src_offset_tz->to_sys<milliseconds>(
+              local_time<milliseconds>(milliseconds(src_timestamp))))
+          .time_since_epoch()
+          .count();
+    } else if (dest_timezone == NULLPTR && src_timezone != NULLPTR) {
+      return dest_offset_tz
+          ->to_local(src_timezone->to_sys<milliseconds>(
+              local_time<milliseconds>(milliseconds(src_timestamp))))
+          .time_since_epoch()
+          .count();
+    } else if (dest_timezone == NULLPTR && src_timezone == NULLPTR) {
+      return dest_offset_tz
+          ->to_local(src_offset_tz->to_sys<milliseconds>(
+              local_time<milliseconds>(milliseconds(src_timestamp))))
+          .time_since_epoch()
+          .count();
+    } else {
+      return dest_timezone
+          ->to_local(src_timezone->to_sys<milliseconds>(
+              local_time<milliseconds>(milliseconds(src_timestamp))))
+          .time_since_epoch()
+          .count();
+    }
+  }
+
+  // Tracks if the timezones given could be found in IANA Timezone DB.
+  bool ok = true;
+
+ private:
+  explicit ConvertTimezoneHolder(const std::string& srcTz, const std::string& destTz) {
+    auto srcTz_abbrv_offset = abbrv_tz.find(srcTz);
+    auto destTz_abbrv_offset = abbrv_tz.find(destTz);
+
+    try {
+      if (srcTz_abbrv_offset != abbrv_tz.end()) {
+        auto secs = convert_offset_to_seconds(srcTz_abbrv_offset->second);
+        src_offset_tz = std::make_shared<OffsetZone>(secs);
+      } else {
+        if (is_timezone_shift(srcTz) ||
+            is_timezone_offset(const_cast<std::string&>(srcTz))) {

Review comment:
       done




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] jvictorhuguenin commented on a change in pull request #10432: ARROW-12924: [Gandiva][C++] Implement CONVERT_TIMEZONE SQL function in Gandiva

Posted by GitBox <gi...@apache.org>.
jvictorhuguenin commented on a change in pull request #10432:
URL: https://github.com/apache/arrow/pull/10432#discussion_r816178933



##########
File path: cpp/src/gandiva/gdv_function_stubs.cc
##########
@@ -183,6 +184,18 @@ bool gdv_fn_in_expr_lookup_utf8(int64_t ptr, const char* data, int data_len,
   return holder->HasValue(arrow::util::string_view(data, data_len));
 }
 
+int64_t gdv_fn_convert_timezone(int64_t ptr, const char* src_tz, int src_tz_len,
+                                const char* dst_tz, int dst_tz_len, int64_t src_millis,
+                                bool validity) {

Review comment:
       I uncommented the lines below




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] anthonylouisbsb commented on a change in pull request #10432: ARROW-12924: [Gandiva][C++] Implement CONVERT_TIMEZONE SQL function in Gandiva

Posted by GitBox <gi...@apache.org>.
anthonylouisbsb commented on a change in pull request #10432:
URL: https://github.com/apache/arrow/pull/10432#discussion_r809527803



##########
File path: cpp/src/gandiva/convert_timezone_holder.h
##########
@@ -0,0 +1,282 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+
+#include "arrow/vendored/datetime/date.h"
+#include "arrow/vendored/datetime/tz.h"
+#include "gandiva/function_holder.h"
+#include "gandiva/node.h"
+#include "gandiva/visibility.h"
+
+using arrow_vendored::date::local_time;
+using arrow_vendored::date::locate_zone;
+using arrow_vendored::date::make_zoned;
+using arrow_vendored::date::sys_time;
+using arrow_vendored::date::time_zone;
+using std::chrono::milliseconds;
+
+namespace gandiva {
+
+class OffsetZone {
+  std::chrono::seconds offset_;
+
+ public:
+  explicit OffsetZone(std::chrono::seconds offset) : offset_(offset) {}
+
+  template <class Duration>
+  auto to_local(arrow_vendored::date::sys_time<Duration> tp) const {
+    using std::common_type_t;
+    using std::chrono::seconds;
+    using LT = local_time<std::common_type_t<Duration, seconds>>;
+    return LT((tp + offset_).time_since_epoch());
+  }
+
+  template <class Duration>
+  auto to_sys(arrow_vendored::date::local_time<Duration> tp) const {
+    using std::chrono::seconds;
+    using ST = sys_time<std::common_type_t<Duration, seconds>>;
+    return ST((tp - offset_).time_since_epoch());
+  }
+};
+
+/// Function Holder for SQL 'CONVERT_TIMEZONE'
+class GANDIVA_EXPORT ConvertTimezoneHolder : public FunctionHolder {
+ public:
+  ~ConvertTimezoneHolder() override = default;
+
+  static Status Make(const FunctionNode& node,
+                     std::shared_ptr<ConvertTimezoneHolder>* holder);
+
+  static Status Make(const std::string& srcTz, const std::string& destTz,
+                     std::shared_ptr<ConvertTimezoneHolder>* holder);
+
+  /// Return the converted timestamp
+  int64_t convert(const int64_t src_timestamp) {
+    using std::chrono::seconds;
+
+    if (dest_timezone != NULLPTR && src_timezone == NULLPTR) {
+      return dest_timezone
+          ->to_local(src_offset_tz->to_sys<milliseconds>(
+              local_time<milliseconds>(milliseconds(src_timestamp))))
+          .time_since_epoch()
+          .count();
+    } else if (dest_timezone == NULLPTR && src_timezone != NULLPTR) {
+      return dest_offset_tz
+          ->to_local(src_timezone->to_sys<milliseconds>(
+              local_time<milliseconds>(milliseconds(src_timestamp))))
+          .time_since_epoch()
+          .count();
+    } else if (dest_timezone == NULLPTR && src_timezone == NULLPTR) {
+      return dest_offset_tz
+          ->to_local(src_offset_tz->to_sys<milliseconds>(
+              local_time<milliseconds>(milliseconds(src_timestamp))))
+          .time_since_epoch()
+          .count();
+    } else {
+      return dest_timezone
+          ->to_local(src_timezone->to_sys<milliseconds>(
+              local_time<milliseconds>(milliseconds(src_timestamp))))
+          .time_since_epoch()
+          .count();
+    }
+  }
+
+  // Tracks if the timezones given could be found in IANA Timezone DB.
+  bool ok = true;
+
+ private:
+  explicit ConvertTimezoneHolder(const std::string& srcTz, const std::string& destTz) {
+    auto srcTz_abbrv_offset = abbrv_tz.find(srcTz);
+    auto destTz_abbrv_offset = abbrv_tz.find(destTz);
+
+    try {
+      if (srcTz_abbrv_offset != abbrv_tz.end()) {
+        auto secs = convert_offset_to_seconds(srcTz_abbrv_offset->second);
+        src_offset_tz = std::make_shared<OffsetZone>(secs);
+      } else {
+        if (is_timezone_shift(srcTz) ||
+            is_timezone_offset(const_cast<std::string&>(srcTz))) {
+          auto secs = convert_offset_to_seconds(srcTz);
+          src_offset_tz = std::make_shared<OffsetZone>(secs);
+        } else {
+          src_timezone = locate_zone(srcTz);
+        }
+      }
+
+      if (destTz_abbrv_offset != abbrv_tz.end()) {
+        auto secs = convert_offset_to_seconds(destTz_abbrv_offset->second);
+        dest_offset_tz = std::make_shared<OffsetZone>(secs);
+      } else {
+        if (is_timezone_shift(destTz) ||
+            is_timezone_offset(const_cast<std::string&>(destTz))) {
+          auto secs = convert_offset_to_seconds(destTz);
+          dest_offset_tz = std::make_shared<OffsetZone>(secs);
+        } else {
+          dest_timezone = locate_zone(destTz);
+        }
+      }
+    } catch (...) {
+      ok = false;
+    }
+  }
+
+  static bool is_timezone_offset(std::string& offset) {
+    if ((offset.rfind("-") == 0 || offset.rfind("+") == 0) && offset.length() == 6) {
+      return true;
+    }
+    return false;
+  }
+
+  std::chrono::seconds convert_offset_to_seconds(std::string string_offset) {
+    int32_t prefix_offset_length = is_timezone_shift(string_offset);
+    if (prefix_offset_length != 0) {
+      auto abbrv_offset = abbrv_tz.find(string_offset.substr(0, prefix_offset_length));
+      auto abbrv_seconds = off_set_to_seconds(abbrv_offset->second);
+      auto shift_seconds = off_set_to_seconds(string_offset.substr(prefix_offset_length));
+      return abbrv_seconds + shift_seconds;
+    } else {
+      return off_set_to_seconds(string_offset);
+    }
+  }
+
+  static int32_t parse_number(std::string& string_offset, int32_t pos,
+                              bool preceded_by_colon) {
+    if (preceded_by_colon && string_offset[pos - 1] != ':') {
+      throw "Invalid ID for ZoneOffset, colon not found when expected: " + string_offset;
+    }
+    char ch1 = string_offset[pos];
+    char ch2 = string_offset[pos + 1];
+    if (ch1 < '0' || ch1 > '9' || ch2 < '0' || ch2 > '9') {
+      throw "Invalid ID for ZoneOffset, non numeric characters found: " + string_offset;
+    }
+
+    return (ch1 - 48) * 10 + (ch2 - 48);
+  }
+
+  std::chrono::seconds off_set_to_seconds(std::string string_offset) {
+    // parse - +h, +hh, +hhmm, +hh:mm, +hhmmss, +hh:mm:ss
+    int32_t hours, minutes, seconds;
+    std::string sub_minus, sub_plus;
+    switch (string_offset.length()) {
+      case 2:
+        sub_minus = string_offset.substr(string_offset.find("-") + 1);
+        if (sub_minus.length() == 1) {
+          string_offset = "-0" + sub_minus;  // fallthru
+        }
+        sub_plus = string_offset.substr(string_offset.find("+") + 1);
+        if (sub_plus.length() == 1) {
+          string_offset = "+0" + sub_plus;  // fallthru
+        }
+      case 3:
+        hours = parse_number(string_offset, 1, false);
+        minutes = 0;
+        seconds = 0;
+        break;
+      case 5:
+        hours = parse_number(string_offset, 1, false);
+        minutes = parse_number(string_offset, 3, false);
+        seconds = 0;
+        break;
+      case 6:
+        hours = parse_number(string_offset, 1, false);
+        minutes = parse_number(string_offset, 4, true);
+        seconds = 0;
+        break;
+      case 7:
+        hours = parse_number(string_offset, 1, false);
+        minutes = parse_number(string_offset, 3, false);
+        seconds = parse_number(string_offset, 5, false);
+        break;
+      case 9:
+        hours = parse_number(string_offset, 1, false);
+        minutes = parse_number(string_offset, 4, true);
+        seconds = parse_number(string_offset, 7, true);
+        break;
+      default:
+        break;
+    }
+    char first = string_offset.at(0);
+    if (first != '+' && first != '-') {
+      throw "Invalid ID for ZoneOffset, plus/minus not found when expected: " +
+          string_offset;
+    }
+    if (first == '-') {
+      return seconds_from_minutes_hours(-hours, -minutes, -seconds);
+    } else {
+      return seconds_from_minutes_hours(hours, minutes, seconds);
+    }
+  }
+
+  void validate(int32_t hours, int32_t minutes, int32_t seconds) {
+    if (hours < -18 || hours > 18) {
+      throw "Zone offset hours not in valid range";
+    }
+    if (hours > 0) {
+      if (minutes < 0 || seconds < 0) {
+        throw "Zone offset minutes and seconds must be positive because hours is "
+              "positive";
+      }
+    } else if (hours < 0) {
+      if (minutes > 0 || seconds > 0) {
+        throw "Zone offset minutes and seconds must be negative because hours is "
+              "negative";
+      }
+    } else if ((minutes > 0 && seconds < 0) || (minutes < 0 && seconds > 0)) {
+      throw "Zone offset minutes and seconds must have the same sign";
+    }
+    if (minutes < -59 || minutes > 59) {
+      throw "Zone offset minutes not in valid range";
+    }
+    if (seconds < -59 || seconds > 59) {
+      throw "Zone offset seconds not in valid range";
+    }
+    if (abs(hours) == 18 && (minutes | seconds) != 0) {
+      throw "Zone offset not in valid range: -18:00 to +18:00";
+    }
+  }
+
+  std::chrono::seconds seconds_from_minutes_hours(int32_t hours, int32_t minutes,
+                                                  int32_t seconds) {
+    validate(hours, minutes, seconds);
+    int32_t total_seconds = hours * 3600 + minutes * 60 + seconds;
+    return std::chrono::seconds(total_seconds);
+  }
+
+  int32_t is_timezone_shift(const std::string& string_offset) {
+    if (string_offset.rfind("UTC") == 0 || string_offset.rfind("GMT") == 0) {
+      return 3;
+    } else if (string_offset.rfind("UT") == 0) {
+      return 2;
+    }
+    return 0;
+  }
+
+  std::shared_ptr<OffsetZone> src_offset_tz = NULLPTR;
+  std::shared_ptr<OffsetZone> dest_offset_tz = NULLPTR;
+
+  const time_zone* src_timezone = NULLPTR;
+  const time_zone* dest_timezone = NULLPTR;

Review comment:
       Could you use a shared pointer here? Because I think it is dangerous to use plain pointers as they may be not deallocated

##########
File path: cpp/src/gandiva/gdv_function_stubs.h
##########
@@ -120,6 +120,9 @@ const char* gdv_fn_sha1_decimal128(int64_t context, int64_t x_high, uint64_t x_l
                                    int32_t x_precision, int32_t x_scale,
                                    gdv_boolean x_isvalid, int32_t* out_length);
 
+int gdv_fn_utctime_to_zone(int* time_fields, const char* zone, int zone_len,

Review comment:
       I looked for that function inside the gdv_stubs, but I did not find it

##########
File path: cpp/src/gandiva/convert_timezone_holder_test.cc
##########
@@ -0,0 +1,144 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "gandiva/convert_timezone_holder.h"
+
+#include <gtest/gtest.h>
+#include "gandiva/precompiled/testing.h"
+
+#include <memory>
+#include <vector>
+
+namespace gandiva {
+class TestConvertTimezone : public ::testing::Test {
+ public:
+  FunctionNode BuildConvert(std::string srcTz, std::string dstTz) {
+    auto field = std::make_shared<FieldNode>(arrow::field("times", arrow::int64()));
+    auto srcTz_node =
+        std::make_shared<LiteralNode>(arrow::utf8(), LiteralHolder(srcTz), false);
+    auto dstTz_node =
+        std::make_shared<LiteralNode>(arrow::utf8(), LiteralHolder(dstTz), false);
+    return FunctionNode("convert_timezone", {field, srcTz_node, dstTz_node},
+                        arrow::int64());
+  }
+};
+
+TEST_F(TestConvertTimezone, TestConvertTimezoneName) {
+  std::shared_ptr<ConvertTimezoneHolder> convert_holder;
+
+  auto status =
+      ConvertTimezoneHolder::Make("Canada/Pacific", "Asia/Kolkata", &convert_holder);
+  EXPECT_EQ(status.ok(), true) << status.message();
+
+  EXPECT_EQ(convert_holder->convert(StringToTimestamp("2016-02-01 08:29:00")),
+            StringToTimestamp("2016-02-01 21:59:00"));
+  EXPECT_EQ(convert_holder->convert(StringToTimestamp("2016-10-01 08:29:00")),
+            StringToTimestamp("2016-10-01 20:59:00"));  // Checks if it considers Daylight
+                                                        // saving time periods.
+  EXPECT_EQ(convert_holder->convert(StringToTimestamp("2016-02-28 23:59:59")),
+            StringToTimestamp("2016-02-29 13:29:59"));
+  EXPECT_EQ(convert_holder->convert(StringToTimestamp("2015-02-28 23:59:59")),
+            StringToTimestamp("2015-03-01 13:29:59"));
+  EXPECT_EQ(convert_holder->convert(StringToTimestamp("1969-01-01 08:29:00")),
+            StringToTimestamp("1969-01-01 21:59:00"));
+  EXPECT_EQ(convert_holder->convert(StringToTimestamp("1950-10-01 08:29:00")),
+            StringToTimestamp("1950-10-01 21:59:00"));  // Checks if it considers Daylight
+                                                        // saving time periods.
+}
+
+TEST_F(TestConvertTimezone, TestConvertTimezoneAbbreviations) {

Review comment:
       Add some tests with error cases, like a timezone that does not exist

##########
File path: cpp/src/gandiva/convert_timezone_holder.h
##########
@@ -0,0 +1,282 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+
+#include "arrow/vendored/datetime/date.h"
+#include "arrow/vendored/datetime/tz.h"
+#include "gandiva/function_holder.h"
+#include "gandiva/node.h"
+#include "gandiva/visibility.h"
+
+using arrow_vendored::date::local_time;
+using arrow_vendored::date::locate_zone;
+using arrow_vendored::date::make_zoned;
+using arrow_vendored::date::sys_time;
+using arrow_vendored::date::time_zone;
+using std::chrono::milliseconds;
+
+namespace gandiva {
+
+class OffsetZone {
+  std::chrono::seconds offset_;
+
+ public:
+  explicit OffsetZone(std::chrono::seconds offset) : offset_(offset) {}
+
+  template <class Duration>
+  auto to_local(arrow_vendored::date::sys_time<Duration> tp) const {
+    using std::common_type_t;
+    using std::chrono::seconds;
+    using LT = local_time<std::common_type_t<Duration, seconds>>;
+    return LT((tp + offset_).time_since_epoch());
+  }
+
+  template <class Duration>
+  auto to_sys(arrow_vendored::date::local_time<Duration> tp) const {
+    using std::chrono::seconds;
+    using ST = sys_time<std::common_type_t<Duration, seconds>>;
+    return ST((tp - offset_).time_since_epoch());
+  }
+};
+
+/// Function Holder for SQL 'CONVERT_TIMEZONE'
+class GANDIVA_EXPORT ConvertTimezoneHolder : public FunctionHolder {
+ public:
+  ~ConvertTimezoneHolder() override = default;
+
+  static Status Make(const FunctionNode& node,
+                     std::shared_ptr<ConvertTimezoneHolder>* holder);
+
+  static Status Make(const std::string& srcTz, const std::string& destTz,
+                     std::shared_ptr<ConvertTimezoneHolder>* holder);
+
+  /// Return the converted timestamp
+  int64_t convert(const int64_t src_timestamp) {
+    using std::chrono::seconds;
+
+    if (dest_timezone != NULLPTR && src_timezone == NULLPTR) {
+      return dest_timezone
+          ->to_local(src_offset_tz->to_sys<milliseconds>(
+              local_time<milliseconds>(milliseconds(src_timestamp))))
+          .time_since_epoch()
+          .count();
+    } else if (dest_timezone == NULLPTR && src_timezone != NULLPTR) {
+      return dest_offset_tz
+          ->to_local(src_timezone->to_sys<milliseconds>(
+              local_time<milliseconds>(milliseconds(src_timestamp))))
+          .time_since_epoch()
+          .count();
+    } else if (dest_timezone == NULLPTR && src_timezone == NULLPTR) {
+      return dest_offset_tz
+          ->to_local(src_offset_tz->to_sys<milliseconds>(
+              local_time<milliseconds>(milliseconds(src_timestamp))))
+          .time_since_epoch()
+          .count();
+    } else {
+      return dest_timezone
+          ->to_local(src_timezone->to_sys<milliseconds>(
+              local_time<milliseconds>(milliseconds(src_timestamp))))
+          .time_since_epoch()
+          .count();
+    }
+  }
+
+  // Tracks if the timezones given could be found in IANA Timezone DB.
+  bool ok = true;
+
+ private:
+  explicit ConvertTimezoneHolder(const std::string& srcTz, const std::string& destTz) {
+    auto srcTz_abbrv_offset = abbrv_tz.find(srcTz);
+    auto destTz_abbrv_offset = abbrv_tz.find(destTz);
+
+    try {
+      if (srcTz_abbrv_offset != abbrv_tz.end()) {
+        auto secs = convert_offset_to_seconds(srcTz_abbrv_offset->second);
+        src_offset_tz = std::make_shared<OffsetZone>(secs);
+      } else {
+        if (is_timezone_shift(srcTz) ||
+            is_timezone_offset(const_cast<std::string&>(srcTz))) {

Review comment:
       You can put that condition as an else-if and the other as else

##########
File path: cpp/src/gandiva/gdv_function_stubs.cc
##########
@@ -183,6 +184,18 @@ bool gdv_fn_in_expr_lookup_utf8(int64_t ptr, const char* data, int data_len,
   return holder->HasValue(arrow::util::string_view(data, data_len));
 }
 
+int64_t gdv_fn_convert_timezone(int64_t ptr, const char* src_tz, int src_tz_len,
+                                const char* dst_tz, int dst_tz_len, int64_t src_millis,
+                                bool validity) {

Review comment:
       Why you add that last validity parameter?

##########
File path: cpp/src/gandiva/convert_timezone_holder.h
##########
@@ -0,0 +1,282 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+
+#include "arrow/vendored/datetime/date.h"
+#include "arrow/vendored/datetime/tz.h"
+#include "gandiva/function_holder.h"
+#include "gandiva/node.h"
+#include "gandiva/visibility.h"
+
+using arrow_vendored::date::local_time;
+using arrow_vendored::date::locate_zone;
+using arrow_vendored::date::make_zoned;
+using arrow_vendored::date::sys_time;
+using arrow_vendored::date::time_zone;
+using std::chrono::milliseconds;
+
+namespace gandiva {
+
+class OffsetZone {
+  std::chrono::seconds offset_;
+
+ public:
+  explicit OffsetZone(std::chrono::seconds offset) : offset_(offset) {}
+
+  template <class Duration>
+  auto to_local(arrow_vendored::date::sys_time<Duration> tp) const {
+    using std::common_type_t;
+    using std::chrono::seconds;
+    using LT = local_time<std::common_type_t<Duration, seconds>>;

Review comment:
       You add `using std::common_type_t` in the first line, but you repeat the std:: again. Remove the first line or remove the std:: in the last one

##########
File path: cpp/src/gandiva/convert_timezone_holder.h
##########
@@ -0,0 +1,282 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+
+#include "arrow/vendored/datetime/date.h"
+#include "arrow/vendored/datetime/tz.h"
+#include "gandiva/function_holder.h"
+#include "gandiva/node.h"
+#include "gandiva/visibility.h"
+
+using arrow_vendored::date::local_time;
+using arrow_vendored::date::locate_zone;
+using arrow_vendored::date::make_zoned;
+using arrow_vendored::date::sys_time;
+using arrow_vendored::date::time_zone;
+using std::chrono::milliseconds;
+
+namespace gandiva {
+
+class OffsetZone {
+  std::chrono::seconds offset_;
+
+ public:
+  explicit OffsetZone(std::chrono::seconds offset) : offset_(offset) {}
+
+  template <class Duration>
+  auto to_local(arrow_vendored::date::sys_time<Duration> tp) const {
+    using std::common_type_t;
+    using std::chrono::seconds;
+    using LT = local_time<std::common_type_t<Duration, seconds>>;
+    return LT((tp + offset_).time_since_epoch());
+  }
+
+  template <class Duration>
+  auto to_sys(arrow_vendored::date::local_time<Duration> tp) const {
+    using std::chrono::seconds;
+    using ST = sys_time<std::common_type_t<Duration, seconds>>;
+    return ST((tp - offset_).time_since_epoch());
+  }
+};
+
+/// Function Holder for SQL 'CONVERT_TIMEZONE'
+class GANDIVA_EXPORT ConvertTimezoneHolder : public FunctionHolder {
+ public:
+  ~ConvertTimezoneHolder() override = default;
+
+  static Status Make(const FunctionNode& node,
+                     std::shared_ptr<ConvertTimezoneHolder>* holder);
+
+  static Status Make(const std::string& srcTz, const std::string& destTz,
+                     std::shared_ptr<ConvertTimezoneHolder>* holder);
+
+  /// Return the converted timestamp
+  int64_t convert(const int64_t src_timestamp) {
+    using std::chrono::seconds;
+
+    if (dest_timezone != NULLPTR && src_timezone == NULLPTR) {
+      return dest_timezone
+          ->to_local(src_offset_tz->to_sys<milliseconds>(
+              local_time<milliseconds>(milliseconds(src_timestamp))))
+          .time_since_epoch()
+          .count();
+    } else if (dest_timezone == NULLPTR && src_timezone != NULLPTR) {
+      return dest_offset_tz
+          ->to_local(src_timezone->to_sys<milliseconds>(
+              local_time<milliseconds>(milliseconds(src_timestamp))))
+          .time_since_epoch()
+          .count();
+    } else if (dest_timezone == NULLPTR && src_timezone == NULLPTR) {
+      return dest_offset_tz
+          ->to_local(src_offset_tz->to_sys<milliseconds>(
+              local_time<milliseconds>(milliseconds(src_timestamp))))
+          .time_since_epoch()
+          .count();
+    } else {
+      return dest_timezone
+          ->to_local(src_timezone->to_sys<milliseconds>(
+              local_time<milliseconds>(milliseconds(src_timestamp))))
+          .time_since_epoch()
+          .count();
+    }
+  }
+
+  // Tracks if the timezones given could be found in IANA Timezone DB.
+  bool ok = true;

Review comment:
       Use a more descriptive name for the variabke

##########
File path: cpp/src/gandiva/function_holder_registry.h
##########
@@ -64,7 +65,8 @@ class FunctionHolderRegistry {
     static map_type maker_map = {{"like", LAMBDA_MAKER(LikeHolder)},
                                  {"ilike", LAMBDA_MAKER(LikeHolder)},
                                  {"to_date", LAMBDA_MAKER(ToDateHolder)},
-                                 {"random", LAMBDA_MAKER(RandomGeneratorHolder)},
+                                 {"convert_timezone", LAMBDA_MAKER(ConvertTimezoneHolder)},
+        {"random", LAMBDA_MAKER(RandomGeneratorHolder)},

Review comment:
       Check that format style, it is strange




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] jvictorhuguenin commented on a change in pull request #10432: ARROW-12924: [Gandiva][C++] Implement CONVERT_TIMEZONE SQL function in Gandiva

Posted by GitBox <gi...@apache.org>.
jvictorhuguenin commented on a change in pull request #10432:
URL: https://github.com/apache/arrow/pull/10432#discussion_r813813893



##########
File path: cpp/src/gandiva/convert_timezone_holder.h
##########
@@ -0,0 +1,282 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+
+#include "arrow/vendored/datetime/date.h"
+#include "arrow/vendored/datetime/tz.h"
+#include "gandiva/function_holder.h"
+#include "gandiva/node.h"
+#include "gandiva/visibility.h"
+
+using arrow_vendored::date::local_time;
+using arrow_vendored::date::locate_zone;
+using arrow_vendored::date::make_zoned;
+using arrow_vendored::date::sys_time;
+using arrow_vendored::date::time_zone;
+using std::chrono::milliseconds;
+
+namespace gandiva {
+
+class OffsetZone {
+  std::chrono::seconds offset_;
+
+ public:
+  explicit OffsetZone(std::chrono::seconds offset) : offset_(offset) {}
+
+  template <class Duration>
+  auto to_local(arrow_vendored::date::sys_time<Duration> tp) const {
+    using std::common_type_t;
+    using std::chrono::seconds;
+    using LT = local_time<std::common_type_t<Duration, seconds>>;
+    return LT((tp + offset_).time_since_epoch());
+  }
+
+  template <class Duration>
+  auto to_sys(arrow_vendored::date::local_time<Duration> tp) const {
+    using std::chrono::seconds;
+    using ST = sys_time<std::common_type_t<Duration, seconds>>;
+    return ST((tp - offset_).time_since_epoch());
+  }
+};
+
+/// Function Holder for SQL 'CONVERT_TIMEZONE'
+class GANDIVA_EXPORT ConvertTimezoneHolder : public FunctionHolder {
+ public:
+  ~ConvertTimezoneHolder() override = default;
+
+  static Status Make(const FunctionNode& node,
+                     std::shared_ptr<ConvertTimezoneHolder>* holder);
+
+  static Status Make(const std::string& srcTz, const std::string& destTz,
+                     std::shared_ptr<ConvertTimezoneHolder>* holder);
+
+  /// Return the converted timestamp
+  int64_t convert(const int64_t src_timestamp) {
+    using std::chrono::seconds;
+
+    if (dest_timezone != NULLPTR && src_timezone == NULLPTR) {
+      return dest_timezone
+          ->to_local(src_offset_tz->to_sys<milliseconds>(
+              local_time<milliseconds>(milliseconds(src_timestamp))))
+          .time_since_epoch()
+          .count();
+    } else if (dest_timezone == NULLPTR && src_timezone != NULLPTR) {
+      return dest_offset_tz
+          ->to_local(src_timezone->to_sys<milliseconds>(
+              local_time<milliseconds>(milliseconds(src_timestamp))))
+          .time_since_epoch()
+          .count();
+    } else if (dest_timezone == NULLPTR && src_timezone == NULLPTR) {
+      return dest_offset_tz
+          ->to_local(src_offset_tz->to_sys<milliseconds>(
+              local_time<milliseconds>(milliseconds(src_timestamp))))
+          .time_since_epoch()
+          .count();
+    } else {
+      return dest_timezone
+          ->to_local(src_timezone->to_sys<milliseconds>(
+              local_time<milliseconds>(milliseconds(src_timestamp))))
+          .time_since_epoch()
+          .count();
+    }
+  }
+
+  // Tracks if the timezones given could be found in IANA Timezone DB.
+  bool ok = true;
+
+ private:
+  explicit ConvertTimezoneHolder(const std::string& srcTz, const std::string& destTz) {
+    auto srcTz_abbrv_offset = abbrv_tz.find(srcTz);
+    auto destTz_abbrv_offset = abbrv_tz.find(destTz);
+
+    try {
+      if (srcTz_abbrv_offset != abbrv_tz.end()) {
+        auto secs = convert_offset_to_seconds(srcTz_abbrv_offset->second);
+        src_offset_tz = std::make_shared<OffsetZone>(secs);
+      } else {
+        if (is_timezone_shift(srcTz) ||
+            is_timezone_offset(const_cast<std::string&>(srcTz))) {
+          auto secs = convert_offset_to_seconds(srcTz);
+          src_offset_tz = std::make_shared<OffsetZone>(secs);
+        } else {
+          src_timezone = locate_zone(srcTz);
+        }
+      }
+
+      if (destTz_abbrv_offset != abbrv_tz.end()) {
+        auto secs = convert_offset_to_seconds(destTz_abbrv_offset->second);
+        dest_offset_tz = std::make_shared<OffsetZone>(secs);
+      } else {
+        if (is_timezone_shift(destTz) ||
+            is_timezone_offset(const_cast<std::string&>(destTz))) {
+          auto secs = convert_offset_to_seconds(destTz);
+          dest_offset_tz = std::make_shared<OffsetZone>(secs);
+        } else {
+          dest_timezone = locate_zone(destTz);
+        }
+      }
+    } catch (...) {
+      ok = false;
+    }
+  }
+
+  static bool is_timezone_offset(std::string& offset) {
+    if ((offset.rfind("-") == 0 || offset.rfind("+") == 0) && offset.length() == 6) {
+      return true;
+    }
+    return false;
+  }
+
+  std::chrono::seconds convert_offset_to_seconds(std::string string_offset) {
+    int32_t prefix_offset_length = is_timezone_shift(string_offset);
+    if (prefix_offset_length != 0) {
+      auto abbrv_offset = abbrv_tz.find(string_offset.substr(0, prefix_offset_length));
+      auto abbrv_seconds = off_set_to_seconds(abbrv_offset->second);
+      auto shift_seconds = off_set_to_seconds(string_offset.substr(prefix_offset_length));
+      return abbrv_seconds + shift_seconds;
+    } else {
+      return off_set_to_seconds(string_offset);
+    }
+  }
+
+  static int32_t parse_number(std::string& string_offset, int32_t pos,
+                              bool preceded_by_colon) {
+    if (preceded_by_colon && string_offset[pos - 1] != ':') {
+      throw "Invalid ID for ZoneOffset, colon not found when expected: " + string_offset;
+    }
+    char ch1 = string_offset[pos];
+    char ch2 = string_offset[pos + 1];
+    if (ch1 < '0' || ch1 > '9' || ch2 < '0' || ch2 > '9') {
+      throw "Invalid ID for ZoneOffset, non numeric characters found: " + string_offset;
+    }
+
+    return (ch1 - 48) * 10 + (ch2 - 48);
+  }
+
+  std::chrono::seconds off_set_to_seconds(std::string string_offset) {
+    // parse - +h, +hh, +hhmm, +hh:mm, +hhmmss, +hh:mm:ss
+    int32_t hours, minutes, seconds;
+    std::string sub_minus, sub_plus;
+    switch (string_offset.length()) {
+      case 2:
+        sub_minus = string_offset.substr(string_offset.find("-") + 1);
+        if (sub_minus.length() == 1) {
+          string_offset = "-0" + sub_minus;  // fallthru
+        }
+        sub_plus = string_offset.substr(string_offset.find("+") + 1);
+        if (sub_plus.length() == 1) {
+          string_offset = "+0" + sub_plus;  // fallthru
+        }
+      case 3:
+        hours = parse_number(string_offset, 1, false);
+        minutes = 0;
+        seconds = 0;
+        break;
+      case 5:
+        hours = parse_number(string_offset, 1, false);
+        minutes = parse_number(string_offset, 3, false);
+        seconds = 0;
+        break;
+      case 6:
+        hours = parse_number(string_offset, 1, false);
+        minutes = parse_number(string_offset, 4, true);
+        seconds = 0;
+        break;
+      case 7:
+        hours = parse_number(string_offset, 1, false);
+        minutes = parse_number(string_offset, 3, false);
+        seconds = parse_number(string_offset, 5, false);
+        break;
+      case 9:
+        hours = parse_number(string_offset, 1, false);
+        minutes = parse_number(string_offset, 4, true);
+        seconds = parse_number(string_offset, 7, true);
+        break;
+      default:
+        break;
+    }
+    char first = string_offset.at(0);
+    if (first != '+' && first != '-') {
+      throw "Invalid ID for ZoneOffset, plus/minus not found when expected: " +
+          string_offset;
+    }
+    if (first == '-') {
+      return seconds_from_minutes_hours(-hours, -minutes, -seconds);
+    } else {
+      return seconds_from_minutes_hours(hours, minutes, seconds);
+    }
+  }
+
+  void validate(int32_t hours, int32_t minutes, int32_t seconds) {
+    if (hours < -18 || hours > 18) {
+      throw "Zone offset hours not in valid range";
+    }
+    if (hours > 0) {
+      if (minutes < 0 || seconds < 0) {
+        throw "Zone offset minutes and seconds must be positive because hours is "
+              "positive";
+      }
+    } else if (hours < 0) {
+      if (minutes > 0 || seconds > 0) {
+        throw "Zone offset minutes and seconds must be negative because hours is "
+              "negative";
+      }
+    } else if ((minutes > 0 && seconds < 0) || (minutes < 0 && seconds > 0)) {
+      throw "Zone offset minutes and seconds must have the same sign";
+    }
+    if (minutes < -59 || minutes > 59) {
+      throw "Zone offset minutes not in valid range";
+    }
+    if (seconds < -59 || seconds > 59) {
+      throw "Zone offset seconds not in valid range";
+    }
+    if (abs(hours) == 18 && (minutes | seconds) != 0) {
+      throw "Zone offset not in valid range: -18:00 to +18:00";
+    }
+  }
+
+  std::chrono::seconds seconds_from_minutes_hours(int32_t hours, int32_t minutes,
+                                                  int32_t seconds) {
+    validate(hours, minutes, seconds);
+    int32_t total_seconds = hours * 3600 + minutes * 60 + seconds;
+    return std::chrono::seconds(total_seconds);
+  }
+
+  int32_t is_timezone_shift(const std::string& string_offset) {
+    if (string_offset.rfind("UTC") == 0 || string_offset.rfind("GMT") == 0) {
+      return 3;
+    } else if (string_offset.rfind("UT") == 0) {
+      return 2;
+    }
+    return 0;
+  }
+
+  std::shared_ptr<OffsetZone> src_offset_tz = NULLPTR;
+  std::shared_ptr<OffsetZone> dest_offset_tz = NULLPTR;
+
+  const time_zone* src_timezone = NULLPTR;
+  const time_zone* dest_timezone = NULLPTR;

Review comment:
       It's not possible because of the class used




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] jvictorhuguenin commented on a change in pull request #10432: ARROW-12924: [Gandiva][C++] Implement CONVERT_TIMEZONE SQL function in Gandiva

Posted by GitBox <gi...@apache.org>.
jvictorhuguenin commented on a change in pull request #10432:
URL: https://github.com/apache/arrow/pull/10432#discussion_r816173076



##########
File path: cpp/src/gandiva/function_holder_registry.h
##########
@@ -64,7 +65,8 @@ class FunctionHolderRegistry {
     static map_type maker_map = {{"like", LAMBDA_MAKER(LikeHolder)},
                                  {"ilike", LAMBDA_MAKER(LikeHolder)},
                                  {"to_date", LAMBDA_MAKER(ToDateHolder)},
-                                 {"random", LAMBDA_MAKER(RandomGeneratorHolder)},
+                                 {"convert_timezone", LAMBDA_MAKER(ConvertTimezoneHolder)},
+        {"random", LAMBDA_MAKER(RandomGeneratorHolder)},

Review comment:
       done

##########
File path: cpp/src/gandiva/convert_timezone_holder_test.cc
##########
@@ -0,0 +1,144 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "gandiva/convert_timezone_holder.h"
+
+#include <gtest/gtest.h>
+#include "gandiva/precompiled/testing.h"
+
+#include <memory>
+#include <vector>
+
+namespace gandiva {
+class TestConvertTimezone : public ::testing::Test {
+ public:
+  FunctionNode BuildConvert(std::string srcTz, std::string dstTz) {
+    auto field = std::make_shared<FieldNode>(arrow::field("times", arrow::int64()));
+    auto srcTz_node =
+        std::make_shared<LiteralNode>(arrow::utf8(), LiteralHolder(srcTz), false);
+    auto dstTz_node =
+        std::make_shared<LiteralNode>(arrow::utf8(), LiteralHolder(dstTz), false);
+    return FunctionNode("convert_timezone", {field, srcTz_node, dstTz_node},
+                        arrow::int64());
+  }
+};
+
+TEST_F(TestConvertTimezone, TestConvertTimezoneName) {
+  std::shared_ptr<ConvertTimezoneHolder> convert_holder;
+
+  auto status =
+      ConvertTimezoneHolder::Make("Canada/Pacific", "Asia/Kolkata", &convert_holder);
+  EXPECT_EQ(status.ok(), true) << status.message();
+
+  EXPECT_EQ(convert_holder->convert(StringToTimestamp("2016-02-01 08:29:00")),
+            StringToTimestamp("2016-02-01 21:59:00"));
+  EXPECT_EQ(convert_holder->convert(StringToTimestamp("2016-10-01 08:29:00")),
+            StringToTimestamp("2016-10-01 20:59:00"));  // Checks if it considers Daylight
+                                                        // saving time periods.
+  EXPECT_EQ(convert_holder->convert(StringToTimestamp("2016-02-28 23:59:59")),
+            StringToTimestamp("2016-02-29 13:29:59"));
+  EXPECT_EQ(convert_holder->convert(StringToTimestamp("2015-02-28 23:59:59")),
+            StringToTimestamp("2015-03-01 13:29:59"));
+  EXPECT_EQ(convert_holder->convert(StringToTimestamp("1969-01-01 08:29:00")),
+            StringToTimestamp("1969-01-01 21:59:00"));
+  EXPECT_EQ(convert_holder->convert(StringToTimestamp("1950-10-01 08:29:00")),
+            StringToTimestamp("1950-10-01 21:59:00"));  // Checks if it considers Daylight
+                                                        // saving time periods.
+}
+
+TEST_F(TestConvertTimezone, TestConvertTimezoneAbbreviations) {

Review comment:
       done
   




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] jvictorhuguenin commented on a change in pull request #10432: ARROW-12924: [Gandiva][C++] Implement CONVERT_TIMEZONE SQL function in Gandiva

Posted by GitBox <gi...@apache.org>.
jvictorhuguenin commented on a change in pull request #10432:
URL: https://github.com/apache/arrow/pull/10432#discussion_r813812685



##########
File path: cpp/src/gandiva/gdv_function_stubs.h
##########
@@ -120,6 +120,9 @@ const char* gdv_fn_sha1_decimal128(int64_t context, int64_t x_high, uint64_t x_l
                                    int32_t x_precision, int32_t x_scale,
                                    gdv_boolean x_isvalid, int32_t* out_length);
 
+int gdv_fn_utctime_to_zone(int* time_fields, const char* zone, int zone_len,

Review comment:
       removed it




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] github-actions[bot] commented on pull request #10432: ARROW-12924: [Gandiva][C++] Implement CONVERT_TIMEZONE SQL function in Gandiva

Posted by GitBox <gi...@apache.org>.
github-actions[bot] commented on pull request #10432:
URL: https://github.com/apache/arrow/pull/10432#issuecomment-852494029


   https://issues.apache.org/jira/browse/ARROW-12924


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] github-actions[bot] commented on pull request #10432: ARROW-12924: [Gandiva][C++] Implement CONVERT_TIMEZONE SQL function in Gandiva

Posted by GitBox <gi...@apache.org>.
github-actions[bot] commented on pull request #10432:
URL: https://github.com/apache/arrow/pull/10432#issuecomment-978140366


   :warning: Ticket **has not been started in JIRA**, please click 'Start Progress'.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] jvictorhuguenin commented on a change in pull request #10432: ARROW-12924: [Gandiva][C++] Implement CONVERT_TIMEZONE SQL function in Gandiva

Posted by GitBox <gi...@apache.org>.
jvictorhuguenin commented on a change in pull request #10432:
URL: https://github.com/apache/arrow/pull/10432#discussion_r809545369



##########
File path: cpp/src/gandiva/convert_timezone_holder.h
##########
@@ -0,0 +1,282 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+
+#include "arrow/vendored/datetime/date.h"
+#include "arrow/vendored/datetime/tz.h"
+#include "gandiva/function_holder.h"
+#include "gandiva/node.h"
+#include "gandiva/visibility.h"
+
+using arrow_vendored::date::local_time;
+using arrow_vendored::date::locate_zone;
+using arrow_vendored::date::make_zoned;
+using arrow_vendored::date::sys_time;
+using arrow_vendored::date::time_zone;
+using std::chrono::milliseconds;
+
+namespace gandiva {
+
+class OffsetZone {
+  std::chrono::seconds offset_;
+
+ public:
+  explicit OffsetZone(std::chrono::seconds offset) : offset_(offset) {}
+
+  template <class Duration>
+  auto to_local(arrow_vendored::date::sys_time<Duration> tp) const {
+    using std::common_type_t;
+    using std::chrono::seconds;
+    using LT = local_time<std::common_type_t<Duration, seconds>>;

Review comment:
       done




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] jvictorhuguenin commented on a change in pull request #10432: ARROW-12924: [Gandiva][C++] Implement CONVERT_TIMEZONE SQL function in Gandiva

Posted by GitBox <gi...@apache.org>.
jvictorhuguenin commented on a change in pull request #10432:
URL: https://github.com/apache/arrow/pull/10432#discussion_r813813893



##########
File path: cpp/src/gandiva/convert_timezone_holder.h
##########
@@ -0,0 +1,282 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+
+#include "arrow/vendored/datetime/date.h"
+#include "arrow/vendored/datetime/tz.h"
+#include "gandiva/function_holder.h"
+#include "gandiva/node.h"
+#include "gandiva/visibility.h"
+
+using arrow_vendored::date::local_time;
+using arrow_vendored::date::locate_zone;
+using arrow_vendored::date::make_zoned;
+using arrow_vendored::date::sys_time;
+using arrow_vendored::date::time_zone;
+using std::chrono::milliseconds;
+
+namespace gandiva {
+
+class OffsetZone {
+  std::chrono::seconds offset_;
+
+ public:
+  explicit OffsetZone(std::chrono::seconds offset) : offset_(offset) {}
+
+  template <class Duration>
+  auto to_local(arrow_vendored::date::sys_time<Duration> tp) const {
+    using std::common_type_t;
+    using std::chrono::seconds;
+    using LT = local_time<std::common_type_t<Duration, seconds>>;
+    return LT((tp + offset_).time_since_epoch());
+  }
+
+  template <class Duration>
+  auto to_sys(arrow_vendored::date::local_time<Duration> tp) const {
+    using std::chrono::seconds;
+    using ST = sys_time<std::common_type_t<Duration, seconds>>;
+    return ST((tp - offset_).time_since_epoch());
+  }
+};
+
+/// Function Holder for SQL 'CONVERT_TIMEZONE'
+class GANDIVA_EXPORT ConvertTimezoneHolder : public FunctionHolder {
+ public:
+  ~ConvertTimezoneHolder() override = default;
+
+  static Status Make(const FunctionNode& node,
+                     std::shared_ptr<ConvertTimezoneHolder>* holder);
+
+  static Status Make(const std::string& srcTz, const std::string& destTz,
+                     std::shared_ptr<ConvertTimezoneHolder>* holder);
+
+  /// Return the converted timestamp
+  int64_t convert(const int64_t src_timestamp) {
+    using std::chrono::seconds;
+
+    if (dest_timezone != NULLPTR && src_timezone == NULLPTR) {
+      return dest_timezone
+          ->to_local(src_offset_tz->to_sys<milliseconds>(
+              local_time<milliseconds>(milliseconds(src_timestamp))))
+          .time_since_epoch()
+          .count();
+    } else if (dest_timezone == NULLPTR && src_timezone != NULLPTR) {
+      return dest_offset_tz
+          ->to_local(src_timezone->to_sys<milliseconds>(
+              local_time<milliseconds>(milliseconds(src_timestamp))))
+          .time_since_epoch()
+          .count();
+    } else if (dest_timezone == NULLPTR && src_timezone == NULLPTR) {
+      return dest_offset_tz
+          ->to_local(src_offset_tz->to_sys<milliseconds>(
+              local_time<milliseconds>(milliseconds(src_timestamp))))
+          .time_since_epoch()
+          .count();
+    } else {
+      return dest_timezone
+          ->to_local(src_timezone->to_sys<milliseconds>(
+              local_time<milliseconds>(milliseconds(src_timestamp))))
+          .time_since_epoch()
+          .count();
+    }
+  }
+
+  // Tracks if the timezones given could be found in IANA Timezone DB.
+  bool ok = true;
+
+ private:
+  explicit ConvertTimezoneHolder(const std::string& srcTz, const std::string& destTz) {
+    auto srcTz_abbrv_offset = abbrv_tz.find(srcTz);
+    auto destTz_abbrv_offset = abbrv_tz.find(destTz);
+
+    try {
+      if (srcTz_abbrv_offset != abbrv_tz.end()) {
+        auto secs = convert_offset_to_seconds(srcTz_abbrv_offset->second);
+        src_offset_tz = std::make_shared<OffsetZone>(secs);
+      } else {
+        if (is_timezone_shift(srcTz) ||
+            is_timezone_offset(const_cast<std::string&>(srcTz))) {
+          auto secs = convert_offset_to_seconds(srcTz);
+          src_offset_tz = std::make_shared<OffsetZone>(secs);
+        } else {
+          src_timezone = locate_zone(srcTz);
+        }
+      }
+
+      if (destTz_abbrv_offset != abbrv_tz.end()) {
+        auto secs = convert_offset_to_seconds(destTz_abbrv_offset->second);
+        dest_offset_tz = std::make_shared<OffsetZone>(secs);
+      } else {
+        if (is_timezone_shift(destTz) ||
+            is_timezone_offset(const_cast<std::string&>(destTz))) {
+          auto secs = convert_offset_to_seconds(destTz);
+          dest_offset_tz = std::make_shared<OffsetZone>(secs);
+        } else {
+          dest_timezone = locate_zone(destTz);
+        }
+      }
+    } catch (...) {
+      ok = false;
+    }
+  }
+
+  static bool is_timezone_offset(std::string& offset) {
+    if ((offset.rfind("-") == 0 || offset.rfind("+") == 0) && offset.length() == 6) {
+      return true;
+    }
+    return false;
+  }
+
+  std::chrono::seconds convert_offset_to_seconds(std::string string_offset) {
+    int32_t prefix_offset_length = is_timezone_shift(string_offset);
+    if (prefix_offset_length != 0) {
+      auto abbrv_offset = abbrv_tz.find(string_offset.substr(0, prefix_offset_length));
+      auto abbrv_seconds = off_set_to_seconds(abbrv_offset->second);
+      auto shift_seconds = off_set_to_seconds(string_offset.substr(prefix_offset_length));
+      return abbrv_seconds + shift_seconds;
+    } else {
+      return off_set_to_seconds(string_offset);
+    }
+  }
+
+  static int32_t parse_number(std::string& string_offset, int32_t pos,
+                              bool preceded_by_colon) {
+    if (preceded_by_colon && string_offset[pos - 1] != ':') {
+      throw "Invalid ID for ZoneOffset, colon not found when expected: " + string_offset;
+    }
+    char ch1 = string_offset[pos];
+    char ch2 = string_offset[pos + 1];
+    if (ch1 < '0' || ch1 > '9' || ch2 < '0' || ch2 > '9') {
+      throw "Invalid ID for ZoneOffset, non numeric characters found: " + string_offset;
+    }
+
+    return (ch1 - 48) * 10 + (ch2 - 48);
+  }
+
+  std::chrono::seconds off_set_to_seconds(std::string string_offset) {
+    // parse - +h, +hh, +hhmm, +hh:mm, +hhmmss, +hh:mm:ss
+    int32_t hours, minutes, seconds;
+    std::string sub_minus, sub_plus;
+    switch (string_offset.length()) {
+      case 2:
+        sub_minus = string_offset.substr(string_offset.find("-") + 1);
+        if (sub_minus.length() == 1) {
+          string_offset = "-0" + sub_minus;  // fallthru
+        }
+        sub_plus = string_offset.substr(string_offset.find("+") + 1);
+        if (sub_plus.length() == 1) {
+          string_offset = "+0" + sub_plus;  // fallthru
+        }
+      case 3:
+        hours = parse_number(string_offset, 1, false);
+        minutes = 0;
+        seconds = 0;
+        break;
+      case 5:
+        hours = parse_number(string_offset, 1, false);
+        minutes = parse_number(string_offset, 3, false);
+        seconds = 0;
+        break;
+      case 6:
+        hours = parse_number(string_offset, 1, false);
+        minutes = parse_number(string_offset, 4, true);
+        seconds = 0;
+        break;
+      case 7:
+        hours = parse_number(string_offset, 1, false);
+        minutes = parse_number(string_offset, 3, false);
+        seconds = parse_number(string_offset, 5, false);
+        break;
+      case 9:
+        hours = parse_number(string_offset, 1, false);
+        minutes = parse_number(string_offset, 4, true);
+        seconds = parse_number(string_offset, 7, true);
+        break;
+      default:
+        break;
+    }
+    char first = string_offset.at(0);
+    if (first != '+' && first != '-') {
+      throw "Invalid ID for ZoneOffset, plus/minus not found when expected: " +
+          string_offset;
+    }
+    if (first == '-') {
+      return seconds_from_minutes_hours(-hours, -minutes, -seconds);
+    } else {
+      return seconds_from_minutes_hours(hours, minutes, seconds);
+    }
+  }
+
+  void validate(int32_t hours, int32_t minutes, int32_t seconds) {
+    if (hours < -18 || hours > 18) {
+      throw "Zone offset hours not in valid range";
+    }
+    if (hours > 0) {
+      if (minutes < 0 || seconds < 0) {
+        throw "Zone offset minutes and seconds must be positive because hours is "
+              "positive";
+      }
+    } else if (hours < 0) {
+      if (minutes > 0 || seconds > 0) {
+        throw "Zone offset minutes and seconds must be negative because hours is "
+              "negative";
+      }
+    } else if ((minutes > 0 && seconds < 0) || (minutes < 0 && seconds > 0)) {
+      throw "Zone offset minutes and seconds must have the same sign";
+    }
+    if (minutes < -59 || minutes > 59) {
+      throw "Zone offset minutes not in valid range";
+    }
+    if (seconds < -59 || seconds > 59) {
+      throw "Zone offset seconds not in valid range";
+    }
+    if (abs(hours) == 18 && (minutes | seconds) != 0) {
+      throw "Zone offset not in valid range: -18:00 to +18:00";
+    }
+  }
+
+  std::chrono::seconds seconds_from_minutes_hours(int32_t hours, int32_t minutes,
+                                                  int32_t seconds) {
+    validate(hours, minutes, seconds);
+    int32_t total_seconds = hours * 3600 + minutes * 60 + seconds;
+    return std::chrono::seconds(total_seconds);
+  }
+
+  int32_t is_timezone_shift(const std::string& string_offset) {
+    if (string_offset.rfind("UTC") == 0 || string_offset.rfind("GMT") == 0) {
+      return 3;
+    } else if (string_offset.rfind("UT") == 0) {
+      return 2;
+    }
+    return 0;
+  }
+
+  std::shared_ptr<OffsetZone> src_offset_tz = NULLPTR;
+  std::shared_ptr<OffsetZone> dest_offset_tz = NULLPTR;
+
+  const time_zone* src_timezone = NULLPTR;
+  const time_zone* dest_timezone = NULLPTR;

Review comment:
       It's not possible because of the function used to create the timezone object

##########
File path: cpp/src/gandiva/convert_timezone_holder.h
##########
@@ -0,0 +1,282 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+
+#include "arrow/vendored/datetime/date.h"
+#include "arrow/vendored/datetime/tz.h"
+#include "gandiva/function_holder.h"
+#include "gandiva/node.h"
+#include "gandiva/visibility.h"
+
+using arrow_vendored::date::local_time;
+using arrow_vendored::date::locate_zone;
+using arrow_vendored::date::make_zoned;
+using arrow_vendored::date::sys_time;
+using arrow_vendored::date::time_zone;
+using std::chrono::milliseconds;
+
+namespace gandiva {
+
+class OffsetZone {
+  std::chrono::seconds offset_;
+
+ public:
+  explicit OffsetZone(std::chrono::seconds offset) : offset_(offset) {}
+
+  template <class Duration>
+  auto to_local(arrow_vendored::date::sys_time<Duration> tp) const {
+    using std::common_type_t;
+    using std::chrono::seconds;
+    using LT = local_time<std::common_type_t<Duration, seconds>>;
+    return LT((tp + offset_).time_since_epoch());
+  }
+
+  template <class Duration>
+  auto to_sys(arrow_vendored::date::local_time<Duration> tp) const {
+    using std::chrono::seconds;
+    using ST = sys_time<std::common_type_t<Duration, seconds>>;
+    return ST((tp - offset_).time_since_epoch());
+  }
+};
+
+/// Function Holder for SQL 'CONVERT_TIMEZONE'
+class GANDIVA_EXPORT ConvertTimezoneHolder : public FunctionHolder {
+ public:
+  ~ConvertTimezoneHolder() override = default;
+
+  static Status Make(const FunctionNode& node,
+                     std::shared_ptr<ConvertTimezoneHolder>* holder);
+
+  static Status Make(const std::string& srcTz, const std::string& destTz,
+                     std::shared_ptr<ConvertTimezoneHolder>* holder);
+
+  /// Return the converted timestamp
+  int64_t convert(const int64_t src_timestamp) {
+    using std::chrono::seconds;
+
+    if (dest_timezone != NULLPTR && src_timezone == NULLPTR) {
+      return dest_timezone
+          ->to_local(src_offset_tz->to_sys<milliseconds>(
+              local_time<milliseconds>(milliseconds(src_timestamp))))
+          .time_since_epoch()
+          .count();
+    } else if (dest_timezone == NULLPTR && src_timezone != NULLPTR) {
+      return dest_offset_tz
+          ->to_local(src_timezone->to_sys<milliseconds>(
+              local_time<milliseconds>(milliseconds(src_timestamp))))
+          .time_since_epoch()
+          .count();
+    } else if (dest_timezone == NULLPTR && src_timezone == NULLPTR) {
+      return dest_offset_tz
+          ->to_local(src_offset_tz->to_sys<milliseconds>(
+              local_time<milliseconds>(milliseconds(src_timestamp))))
+          .time_since_epoch()
+          .count();
+    } else {
+      return dest_timezone
+          ->to_local(src_timezone->to_sys<milliseconds>(
+              local_time<milliseconds>(milliseconds(src_timestamp))))
+          .time_since_epoch()
+          .count();
+    }
+  }
+
+  // Tracks if the timezones given could be found in IANA Timezone DB.
+  bool ok = true;

Review comment:
       done




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org