You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2021/12/11 14:38:36 UTC

[GitHub] [arrow] rkavanap commented on a change in pull request #10402: ARROW-12880: [C++][Gandiva] Add castTIME(int32), castTIMESTAMP(int64) and castTIME(utf8) functions

rkavanap commented on a change in pull request #10402:
URL: https://github.com/apache/arrow/pull/10402#discussion_r767160244



##########
File path: cpp/src/gandiva/precompiled/time.cc
##########
@@ -764,6 +764,82 @@ gdv_date64 castDATE_timestamp(gdv_timestamp timestamp_in_millis) {
   return tp.ClearTimeOfDay().MillisSinceEpoch();
 }
 
+/*
+ * Input consists of mandatory and optional fields.
+ * Mandatory fields are hours, minutes.
+ * The seconds and subseconds are optional.
+ * Format is hours:minutes[:seconds.millis]
+ */
+gdv_time32 castTIME_utf8(int64_t context, const char* input, int32_t length) {
+  using gandiva::TimeFields;
+  using std::chrono::hours;
+  using std::chrono::milliseconds;
+  using std::chrono::minutes;
+  using std::chrono::seconds;
+
+  int32_t time_fields[4] = {0, 0, 0, 0};
+  int32_t sub_seconds_len = 0;
+  int32_t time_field_idx = TimeFields::kHours, index = 0, value = 0;
+  while (time_field_idx < TimeFields::kDisplacementHours && index < length) {
+    if (isdigit(input[index])) {
+      value = (value * 10) + (input[index] - '0');
+
+      if (time_field_idx == TimeFields::kSubSeconds) {
+        sub_seconds_len++;
+      }
+    } else {
+      time_fields[time_field_idx - TimeFields::kHours] = value;
+      value = 0;
+
+      switch (input[index]) {
+        case '.':
+        case ':':
+          time_field_idx++;
+          break;
+        default:
+          break;
+      }
+    }
+
+    index++;
+  }
+
+  // Check if the hours and minutes were defined and store the last value
+  if (time_field_idx < TimeFields::kDisplacementHours) {
+    time_fields[time_field_idx - TimeFields::kHours] = value;
+  }
+
+  // adjust the milliseconds
+  if (sub_seconds_len > 0) {
+    if (sub_seconds_len > 3) {
+      const char* msg = "Invalid millis for time value ";
+      set_error_for_date(length, input, msg, context);
+      return 0;
+    }
+
+    while (sub_seconds_len < 3) {
+      time_fields[TimeFields::kSubSeconds - TimeFields::kHours] *= 10;
+      sub_seconds_len++;
+    }
+  }
+
+  int32_t input_hours = time_fields[0];

Review comment:
       should this be kHours instead of 0, just to be consistent

##########
File path: cpp/src/gandiva/precompiled/time.cc
##########
@@ -764,6 +764,82 @@ gdv_date64 castDATE_timestamp(gdv_timestamp timestamp_in_millis) {
   return tp.ClearTimeOfDay().MillisSinceEpoch();
 }
 
+/*
+ * Input consists of mandatory and optional fields.
+ * Mandatory fields are hours, minutes.
+ * The seconds and subseconds are optional.
+ * Format is hours:minutes[:seconds.millis]
+ */
+gdv_time32 castTIME_utf8(int64_t context, const char* input, int32_t length) {
+  using gandiva::TimeFields;
+  using std::chrono::hours;
+  using std::chrono::milliseconds;
+  using std::chrono::minutes;
+  using std::chrono::seconds;
+
+  int32_t time_fields[4] = {0, 0, 0, 0};
+  int32_t sub_seconds_len = 0;
+  int32_t time_field_idx = TimeFields::kHours, index = 0, value = 0;
+  while (time_field_idx < TimeFields::kDisplacementHours && index < length) {
+    if (isdigit(input[index])) {
+      value = (value * 10) + (input[index] - '0');
+
+      if (time_field_idx == TimeFields::kSubSeconds) {
+        sub_seconds_len++;
+      }
+    } else {
+      time_fields[time_field_idx - TimeFields::kHours] = value;
+      value = 0;
+
+      switch (input[index]) {
+        case '.':

Review comment:
       don't we have to be accurate?,, e.g what if someone specifies hours.minutes.seconds  instead of hours:minutes:seconds..shouldn't we treat this as an error?

##########
File path: cpp/src/gandiva/precompiled/time.cc
##########
@@ -764,6 +764,82 @@ gdv_date64 castDATE_timestamp(gdv_timestamp timestamp_in_millis) {
   return tp.ClearTimeOfDay().MillisSinceEpoch();
 }
 
+/*
+ * Input consists of mandatory and optional fields.
+ * Mandatory fields are hours, minutes.
+ * The seconds and subseconds are optional.
+ * Format is hours:minutes[:seconds.millis]
+ */
+gdv_time32 castTIME_utf8(int64_t context, const char* input, int32_t length) {
+  using gandiva::TimeFields;
+  using std::chrono::hours;
+  using std::chrono::milliseconds;
+  using std::chrono::minutes;
+  using std::chrono::seconds;
+
+  int32_t time_fields[4] = {0, 0, 0, 0};

Review comment:
       Isn't 4 a hardcoded number..rest of places you are using constants such as kDisplacementHours etc..

##########
File path: cpp/src/gandiva/precompiled/time.cc
##########
@@ -764,6 +764,82 @@ gdv_date64 castDATE_timestamp(gdv_timestamp timestamp_in_millis) {
   return tp.ClearTimeOfDay().MillisSinceEpoch();
 }
 
+/*
+ * Input consists of mandatory and optional fields.
+ * Mandatory fields are hours, minutes.
+ * The seconds and subseconds are optional.
+ * Format is hours:minutes[:seconds.millis]
+ */
+gdv_time32 castTIME_utf8(int64_t context, const char* input, int32_t length) {
+  using gandiva::TimeFields;
+  using std::chrono::hours;
+  using std::chrono::milliseconds;
+  using std::chrono::minutes;
+  using std::chrono::seconds;
+
+  int32_t time_fields[4] = {0, 0, 0, 0};
+  int32_t sub_seconds_len = 0;
+  int32_t time_field_idx = TimeFields::kHours, index = 0, value = 0;
+  while (time_field_idx < TimeFields::kDisplacementHours && index < length) {
+    if (isdigit(input[index])) {
+      value = (value * 10) + (input[index] - '0');
+
+      if (time_field_idx == TimeFields::kSubSeconds) {
+        sub_seconds_len++;
+      }
+    } else {
+      time_fields[time_field_idx - TimeFields::kHours] = value;
+      value = 0;
+
+      switch (input[index]) {
+        case '.':
+        case ':':
+          time_field_idx++;
+          break;
+        default:

Review comment:
       shouldn't any other character result in an error, otherwise if I specify '11HHSB2', it may look legal, no?

##########
File path: cpp/src/gandiva/precompiled/time_test.cc
##########
@@ -132,6 +132,44 @@ TEST(TestTime, TestCastTimestamp) {
   context.Reset();
 }
 
+TEST(TestTime, TestCastTimeUtf8) {
+  ExecutionContext context;
+  auto context_ptr = reinterpret_cast<int64_t>(&context);
+
+  EXPECT_EQ(castTIME_utf8(context_ptr, "9:45:30", 7), 35130000);
+  EXPECT_EQ(castTIME_utf8(context_ptr, "9:45:30.920", 11), 35130920);
+
+  EXPECT_EQ(castTIME_utf8(context_ptr, "9:45:30.1", 9),
+            castTIME_utf8(context_ptr, "9:45:30", 7) + 100);
+
+  EXPECT_EQ(castTIME_utf8(context_ptr, "9:45:30.10", 10),
+            castTIME_utf8(context_ptr, "9:45:30", 7) + 100);
+
+  EXPECT_EQ(castTIME_utf8(context_ptr, "9:45:30.100", 11),
+            castTIME_utf8(context_ptr, "9:45:30", 7) + 100);
+
+  // error cases
+  EXPECT_EQ(castTIME_utf8(context_ptr, "24:00:00", 8), 0);
+  EXPECT_EQ(context.get_error(), "Not a valid time value 24:00:00");
+  context.Reset();
+
+  EXPECT_EQ(castTIME_utf8(context_ptr, "00:60:00", 8), 0);
+  EXPECT_EQ(context.get_error(), "Not a valid time value 00:60:00");
+  context.Reset();
+
+  EXPECT_EQ(castTIME_utf8(context_ptr, "00:00:100", 9), 0);
+  EXPECT_EQ(context.get_error(), "Not a valid time value 00:00:100");
+  context.Reset();
+
+  EXPECT_EQ(castTIME_utf8(context_ptr, "00:00:00.0001", 13), 0);
+  EXPECT_EQ(context.get_error(), "Invalid millis for time value 00:00:00.0001");
+  context.Reset();
+
+  EXPECT_EQ(castTIME_utf8(context_ptr, "00:00:00.1000", 13), 0);
+  EXPECT_EQ(context.get_error(), "Invalid millis for time value 00:00:00.1000");

Review comment:
       some error cases such as '00.00.00.765' or '0Ab12:35' 




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org