You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2021/07/14 16:43:13 UTC

[GitHub] [arrow] anthonylouisbsb commented on a change in pull request #10711: ARROW-13322: [C++][Gandiva] Add from_unixtime hive function to gandiva

anthonylouisbsb commented on a change in pull request #10711:
URL: https://github.com/apache/arrow/pull/10711#discussion_r669769114



##########
File path: cpp/src/gandiva/precompiled/time.cc
##########
@@ -841,6 +843,161 @@ gdv_int64 castBIGINT_daytimeinterval(gdv_day_time_interval in) {
          extractDay_daytimeinterval(in) * MILLIS_IN_DAY;
 }
 
+FORCE_INLINE
+const char* from_unixtime_int64(gdv_int64 context, gdv_timestamp in, gdv_int32* out_len) {
+  gdv_int64 year = extractYear_timestamp(in);
+  gdv_int64 month = extractMonth_timestamp(in);
+  gdv_int64 day = extractDay_timestamp(in);
+  gdv_int64 hour = extractHour_timestamp(in);
+  gdv_int64 minute = extractMinute_timestamp(in);
+  gdv_int64 second = extractSecond_timestamp(in);
+
+  static const int kTimeStampStringLen = 19;
+  const int char_buffer_length = kTimeStampStringLen + 1;  // snprintf adds \0
+  char char_buffer[char_buffer_length];
+
+  // yyyy-MM-dd hh:mm:ss
+  int res = snprintf(char_buffer, char_buffer_length,
+                     "%04" PRId64 "-%02" PRId64 "-%02" PRId64 " %02" PRId64 ":%02" PRId64
+                     ":%02" PRId64,
+                     year, month, day, hour, minute, second);
+  if (res < 0) {
+    gdv_fn_context_set_error_msg(context, "Could not format the timestamp");
+    *out_len = 0;
+    return "";
+  }
+
+  *out_len = kTimeStampStringLen;
+
+  if (*out_len <= 0) {
+    if (*out_len < 0) {
+      gdv_fn_context_set_error_msg(context, "Length of output string cannot be negative");
+    }
+    *out_len = 0;
+    return "";
+  }
+
+  char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len));
+  if (ret == nullptr) {
+    gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
+    *out_len = 0;
+    return "";
+  }
+
+  memcpy(ret, char_buffer, *out_len);
+  return ret;
+}
+
+FORCE_INLINE
+const char* from_unixtime_int64_utf8(gdv_int64 context, gdv_timestamp in,
+                                     const char* pattern, gdv_int32 pattern_len,
+                                     gdv_int32* out_len) {
+  gdv_int64 year = extractYear_timestamp(in);
+  gdv_int64 month = extractMonth_timestamp(in);
+  gdv_int64 day = extractDay_timestamp(in);
+  gdv_int64 hour = extractHour_timestamp(in);
+  gdv_int64 minute = extractMinute_timestamp(in);
+  gdv_int64 second = extractSecond_timestamp(in);
+  gdv_int64 millis = in % MILLIS_IN_SEC;
+
+  if (pattern_len <= 0) {
+    gdv_fn_context_set_error_msg(context, "Invalid allowed pattern size");
+    *out_len = 0;
+    return "";
+  }
+
+  static const int kTimeStampStringLen = pattern_len;
+  const int char_buffer_length = kTimeStampStringLen + 1;  // snprintf adds \0
+  char char_buffer[char_buffer_length];
+
+  const char* regex_format =
+      "y{4}(-[M]{2})?+.*?(-[d]{2})?+.*?( [h]{2})?+.*?"
+      "(:[mm]{2})?+.*?(:[s]{2})?+.*?(.[s]{3})?+.*?";
+  bool match = std::regex_match(pattern, std::regex(regex_format));
+
+  if (!match) {
+    gdv_fn_context_set_error_msg(context, "Invalid allowed pattern");
+    *out_len = 0;
+    return "";
+  }
+
+  // length from pattern
+  int res = 0;
+
+  switch (pattern_len) {
+    // yyyy
+    case 4:
+      res = snprintf(char_buffer, char_buffer_length, "%04" PRId64, year);
+      break;
+    // yyyy-MM
+    case 7:
+      res = snprintf(char_buffer, char_buffer_length, "%04" PRId64 "-%02" PRId64, year,
+                     month);
+      break;
+    // yyyy-MM-dd
+    case 10:

Review comment:
       What happens if the user defines a pattern like this: `dd-MM-yyyy`, the method will process it correctly?

##########
File path: cpp/src/gandiva/precompiled/time.cc
##########
@@ -841,6 +843,161 @@ gdv_int64 castBIGINT_daytimeinterval(gdv_day_time_interval in) {
          extractDay_daytimeinterval(in) * MILLIS_IN_DAY;
 }
 
+FORCE_INLINE
+const char* from_unixtime_int64(gdv_int64 context, gdv_timestamp in, gdv_int32* out_len) {
+  gdv_int64 year = extractYear_timestamp(in);
+  gdv_int64 month = extractMonth_timestamp(in);
+  gdv_int64 day = extractDay_timestamp(in);
+  gdv_int64 hour = extractHour_timestamp(in);
+  gdv_int64 minute = extractMinute_timestamp(in);
+  gdv_int64 second = extractSecond_timestamp(in);
+
+  static const int kTimeStampStringLen = 19;
+  const int char_buffer_length = kTimeStampStringLen + 1;  // snprintf adds \0
+  char char_buffer[char_buffer_length];
+
+  // yyyy-MM-dd hh:mm:ss
+  int res = snprintf(char_buffer, char_buffer_length,
+                     "%04" PRId64 "-%02" PRId64 "-%02" PRId64 " %02" PRId64 ":%02" PRId64
+                     ":%02" PRId64,
+                     year, month, day, hour, minute, second);
+  if (res < 0) {
+    gdv_fn_context_set_error_msg(context, "Could not format the timestamp");
+    *out_len = 0;
+    return "";
+  }
+
+  *out_len = kTimeStampStringLen;
+
+  if (*out_len <= 0) {
+    if (*out_len < 0) {
+      gdv_fn_context_set_error_msg(context, "Length of output string cannot be negative");
+    }
+    *out_len = 0;
+    return "";
+  }
+
+  char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len));
+  if (ret == nullptr) {
+    gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
+    *out_len = 0;
+    return "";
+  }
+
+  memcpy(ret, char_buffer, *out_len);
+  return ret;
+}
+
+FORCE_INLINE
+const char* from_unixtime_int64_utf8(gdv_int64 context, gdv_timestamp in,
+                                     const char* pattern, gdv_int32 pattern_len,
+                                     gdv_int32* out_len) {
+  gdv_int64 year = extractYear_timestamp(in);
+  gdv_int64 month = extractMonth_timestamp(in);
+  gdv_int64 day = extractDay_timestamp(in);
+  gdv_int64 hour = extractHour_timestamp(in);
+  gdv_int64 minute = extractMinute_timestamp(in);
+  gdv_int64 second = extractSecond_timestamp(in);
+  gdv_int64 millis = in % MILLIS_IN_SEC;
+
+  if (pattern_len <= 0) {
+    gdv_fn_context_set_error_msg(context, "Invalid allowed pattern size");
+    *out_len = 0;
+    return "";
+  }
+
+  static const int kTimeStampStringLen = pattern_len;
+  const int char_buffer_length = kTimeStampStringLen + 1;  // snprintf adds \0
+  char char_buffer[char_buffer_length];
+
+  const char* regex_format =
+      "y{4}(-[M]{2})?+.*?(-[d]{2})?+.*?( [h]{2})?+.*?"
+      "(:[mm]{2})?+.*?(:[s]{2})?+.*?(.[s]{3})?+.*?";

Review comment:
       I think that regex does not cover all possible patterns that function can receive, like: `MM-yyyy-dd`.
   
   The [Hive](https://github.com/apache/hive/blob/a2d50efecdeb066b767ce0ae0850103d5ee35933/hplsql/src/main/java/org/apache/hive/hplsql/functions/FunctionDatetime.java#L173) code use an external library to process that result. I think you should do the same thing,
   
   [Strftime](https://www.cplusplus.com/reference/ctime/strftime/) is an example of a library that you can use to format the output of a time.

##########
File path: cpp/src/gandiva/precompiled/time.cc
##########
@@ -841,6 +843,161 @@ gdv_int64 castBIGINT_daytimeinterval(gdv_day_time_interval in) {
          extractDay_daytimeinterval(in) * MILLIS_IN_DAY;
 }
 
+FORCE_INLINE
+const char* from_unixtime_int64(gdv_int64 context, gdv_timestamp in, gdv_int32* out_len) {
+  gdv_int64 year = extractYear_timestamp(in);
+  gdv_int64 month = extractMonth_timestamp(in);
+  gdv_int64 day = extractDay_timestamp(in);
+  gdv_int64 hour = extractHour_timestamp(in);
+  gdv_int64 minute = extractMinute_timestamp(in);
+  gdv_int64 second = extractSecond_timestamp(in);
+
+  static const int kTimeStampStringLen = 19;
+  const int char_buffer_length = kTimeStampStringLen + 1;  // snprintf adds \0
+  char char_buffer[char_buffer_length];
+
+  // yyyy-MM-dd hh:mm:ss
+  int res = snprintf(char_buffer, char_buffer_length,
+                     "%04" PRId64 "-%02" PRId64 "-%02" PRId64 " %02" PRId64 ":%02" PRId64
+                     ":%02" PRId64,
+                     year, month, day, hour, minute, second);
+  if (res < 0) {
+    gdv_fn_context_set_error_msg(context, "Could not format the timestamp");
+    *out_len = 0;
+    return "";
+  }
+
+  *out_len = kTimeStampStringLen;
+
+  if (*out_len <= 0) {
+    if (*out_len < 0) {
+      gdv_fn_context_set_error_msg(context, "Length of output string cannot be negative");
+    }
+    *out_len = 0;
+    return "";
+  }
+
+  char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len));
+  if (ret == nullptr) {
+    gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
+    *out_len = 0;
+    return "";
+  }
+
+  memcpy(ret, char_buffer, *out_len);
+  return ret;
+}

Review comment:
       This function you had created is very similar to [this other one](https://github.com/apache/arrow/blob/f1a7b0d765ad925cc764ebd3e512f02bcdedfd41/cpp/src/gandiva/precompiled/time.cc#L778). If they are equal, I think you do not need to repeat the code, just add an alias in the function registry for the existent function

##########
File path: cpp/src/gandiva/precompiled/time.cc
##########
@@ -841,6 +843,161 @@ gdv_int64 castBIGINT_daytimeinterval(gdv_day_time_interval in) {
          extractDay_daytimeinterval(in) * MILLIS_IN_DAY;
 }
 
+FORCE_INLINE
+const char* from_unixtime_int64(gdv_int64 context, gdv_timestamp in, gdv_int32* out_len) {
+  gdv_int64 year = extractYear_timestamp(in);
+  gdv_int64 month = extractMonth_timestamp(in);
+  gdv_int64 day = extractDay_timestamp(in);
+  gdv_int64 hour = extractHour_timestamp(in);
+  gdv_int64 minute = extractMinute_timestamp(in);
+  gdv_int64 second = extractSecond_timestamp(in);
+
+  static const int kTimeStampStringLen = 19;
+  const int char_buffer_length = kTimeStampStringLen + 1;  // snprintf adds \0
+  char char_buffer[char_buffer_length];
+
+  // yyyy-MM-dd hh:mm:ss
+  int res = snprintf(char_buffer, char_buffer_length,
+                     "%04" PRId64 "-%02" PRId64 "-%02" PRId64 " %02" PRId64 ":%02" PRId64
+                     ":%02" PRId64,
+                     year, month, day, hour, minute, second);
+  if (res < 0) {
+    gdv_fn_context_set_error_msg(context, "Could not format the timestamp");
+    *out_len = 0;
+    return "";
+  }
+
+  *out_len = kTimeStampStringLen;
+
+  if (*out_len <= 0) {
+    if (*out_len < 0) {
+      gdv_fn_context_set_error_msg(context, "Length of output string cannot be negative");
+    }
+    *out_len = 0;
+    return "";
+  }
+
+  char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len));
+  if (ret == nullptr) {
+    gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
+    *out_len = 0;
+    return "";
+  }
+
+  memcpy(ret, char_buffer, *out_len);
+  return ret;
+}
+
+FORCE_INLINE
+const char* from_unixtime_int64_utf8(gdv_int64 context, gdv_timestamp in,
+                                     const char* pattern, gdv_int32 pattern_len,
+                                     gdv_int32* out_len) {
+  gdv_int64 year = extractYear_timestamp(in);
+  gdv_int64 month = extractMonth_timestamp(in);
+  gdv_int64 day = extractDay_timestamp(in);
+  gdv_int64 hour = extractHour_timestamp(in);
+  gdv_int64 minute = extractMinute_timestamp(in);
+  gdv_int64 second = extractSecond_timestamp(in);
+  gdv_int64 millis = in % MILLIS_IN_SEC;
+
+  if (pattern_len <= 0) {
+    gdv_fn_context_set_error_msg(context, "Invalid allowed pattern size");
+    *out_len = 0;
+    return "";
+  }
+
+  static const int kTimeStampStringLen = pattern_len;
+  const int char_buffer_length = kTimeStampStringLen + 1;  // snprintf adds \0
+  char char_buffer[char_buffer_length];
+
+  const char* regex_format =
+      "y{4}(-[M]{2})?+.*?(-[d]{2})?+.*?( [h]{2})?+.*?"
+      "(:[mm]{2})?+.*?(:[s]{2})?+.*?(.[s]{3})?+.*?";
+  bool match = std::regex_match(pattern, std::regex(regex_format));

Review comment:
       You should move this function to gdv_stubs and must use the RE2 library to process the regex

##########
File path: cpp/src/gandiva/precompiled/time.cc
##########
@@ -841,6 +843,161 @@ gdv_int64 castBIGINT_daytimeinterval(gdv_day_time_interval in) {
          extractDay_daytimeinterval(in) * MILLIS_IN_DAY;
 }
 
+FORCE_INLINE
+const char* from_unixtime_int64(gdv_int64 context, gdv_timestamp in, gdv_int32* out_len) {
+  gdv_int64 year = extractYear_timestamp(in);
+  gdv_int64 month = extractMonth_timestamp(in);
+  gdv_int64 day = extractDay_timestamp(in);
+  gdv_int64 hour = extractHour_timestamp(in);
+  gdv_int64 minute = extractMinute_timestamp(in);
+  gdv_int64 second = extractSecond_timestamp(in);
+
+  static const int kTimeStampStringLen = 19;
+  const int char_buffer_length = kTimeStampStringLen + 1;  // snprintf adds \0
+  char char_buffer[char_buffer_length];
+
+  // yyyy-MM-dd hh:mm:ss
+  int res = snprintf(char_buffer, char_buffer_length,
+                     "%04" PRId64 "-%02" PRId64 "-%02" PRId64 " %02" PRId64 ":%02" PRId64
+                     ":%02" PRId64,
+                     year, month, day, hour, minute, second);
+  if (res < 0) {
+    gdv_fn_context_set_error_msg(context, "Could not format the timestamp");
+    *out_len = 0;
+    return "";
+  }
+
+  *out_len = kTimeStampStringLen;
+
+  if (*out_len <= 0) {
+    if (*out_len < 0) {
+      gdv_fn_context_set_error_msg(context, "Length of output string cannot be negative");
+    }
+    *out_len = 0;
+    return "";
+  }
+
+  char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len));
+  if (ret == nullptr) {
+    gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
+    *out_len = 0;
+    return "";
+  }
+
+  memcpy(ret, char_buffer, *out_len);
+  return ret;
+}
+
+FORCE_INLINE
+const char* from_unixtime_int64_utf8(gdv_int64 context, gdv_timestamp in,
+                                     const char* pattern, gdv_int32 pattern_len,
+                                     gdv_int32* out_len) {
+  gdv_int64 year = extractYear_timestamp(in);
+  gdv_int64 month = extractMonth_timestamp(in);
+  gdv_int64 day = extractDay_timestamp(in);
+  gdv_int64 hour = extractHour_timestamp(in);
+  gdv_int64 minute = extractMinute_timestamp(in);
+  gdv_int64 second = extractSecond_timestamp(in);
+  gdv_int64 millis = in % MILLIS_IN_SEC;
+
+  if (pattern_len <= 0) {
+    gdv_fn_context_set_error_msg(context, "Invalid allowed pattern size");
+    *out_len = 0;
+    return "";
+  }
+
+  static const int kTimeStampStringLen = pattern_len;
+  const int char_buffer_length = kTimeStampStringLen + 1;  // snprintf adds \0
+  char char_buffer[char_buffer_length];
+
+  const char* regex_format =
+      "y{4}(-[M]{2})?+.*?(-[d]{2})?+.*?( [h]{2})?+.*?"
+      "(:[mm]{2})?+.*?(:[s]{2})?+.*?(.[s]{3})?+.*?";
+  bool match = std::regex_match(pattern, std::regex(regex_format));
+
+  if (!match) {
+    gdv_fn_context_set_error_msg(context, "Invalid allowed pattern");
+    *out_len = 0;
+    return "";
+  }
+
+  // length from pattern
+  int res = 0;
+
+  switch (pattern_len) {
+    // yyyy
+    case 4:
+      res = snprintf(char_buffer, char_buffer_length, "%04" PRId64, year);
+      break;
+    // yyyy-MM
+    case 7:
+      res = snprintf(char_buffer, char_buffer_length, "%04" PRId64 "-%02" PRId64, year,
+                     month);
+      break;
+    // yyyy-MM-dd
+    case 10:
+      res = snprintf(char_buffer, char_buffer_length,
+                     "%04" PRId64 "-%02" PRId64 "-%02" PRId64, year, month, day);
+      break;
+    // yyyy-MM-dd hh
+    case 13:

Review comment:
       The same case above, if the user define a different pattern with the same length: `dd-MM-yyyy hh`

##########
File path: cpp/src/gandiva/precompiled/time.cc
##########
@@ -841,6 +843,161 @@ gdv_int64 castBIGINT_daytimeinterval(gdv_day_time_interval in) {
          extractDay_daytimeinterval(in) * MILLIS_IN_DAY;
 }
 
+FORCE_INLINE
+const char* from_unixtime_int64(gdv_int64 context, gdv_timestamp in, gdv_int32* out_len) {
+  gdv_int64 year = extractYear_timestamp(in);
+  gdv_int64 month = extractMonth_timestamp(in);
+  gdv_int64 day = extractDay_timestamp(in);
+  gdv_int64 hour = extractHour_timestamp(in);
+  gdv_int64 minute = extractMinute_timestamp(in);
+  gdv_int64 second = extractSecond_timestamp(in);
+
+  static const int kTimeStampStringLen = 19;
+  const int char_buffer_length = kTimeStampStringLen + 1;  // snprintf adds \0
+  char char_buffer[char_buffer_length];
+
+  // yyyy-MM-dd hh:mm:ss
+  int res = snprintf(char_buffer, char_buffer_length,
+                     "%04" PRId64 "-%02" PRId64 "-%02" PRId64 " %02" PRId64 ":%02" PRId64
+                     ":%02" PRId64,
+                     year, month, day, hour, minute, second);
+  if (res < 0) {
+    gdv_fn_context_set_error_msg(context, "Could not format the timestamp");
+    *out_len = 0;
+    return "";
+  }
+
+  *out_len = kTimeStampStringLen;
+
+  if (*out_len <= 0) {
+    if (*out_len < 0) {
+      gdv_fn_context_set_error_msg(context, "Length of output string cannot be negative");
+    }
+    *out_len = 0;
+    return "";
+  }
+
+  char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len));
+  if (ret == nullptr) {
+    gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
+    *out_len = 0;
+    return "";
+  }
+
+  memcpy(ret, char_buffer, *out_len);
+  return ret;
+}
+
+FORCE_INLINE
+const char* from_unixtime_int64_utf8(gdv_int64 context, gdv_timestamp in,
+                                     const char* pattern, gdv_int32 pattern_len,
+                                     gdv_int32* out_len) {
+  gdv_int64 year = extractYear_timestamp(in);
+  gdv_int64 month = extractMonth_timestamp(in);
+  gdv_int64 day = extractDay_timestamp(in);
+  gdv_int64 hour = extractHour_timestamp(in);
+  gdv_int64 minute = extractMinute_timestamp(in);
+  gdv_int64 second = extractSecond_timestamp(in);
+  gdv_int64 millis = in % MILLIS_IN_SEC;
+
+  if (pattern_len <= 0) {
+    gdv_fn_context_set_error_msg(context, "Invalid allowed pattern size");
+    *out_len = 0;
+    return "";
+  }
+
+  static const int kTimeStampStringLen = pattern_len;
+  const int char_buffer_length = kTimeStampStringLen + 1;  // snprintf adds \0
+  char char_buffer[char_buffer_length];
+
+  const char* regex_format =
+      "y{4}(-[M]{2})?+.*?(-[d]{2})?+.*?( [h]{2})?+.*?"
+      "(:[mm]{2})?+.*?(:[s]{2})?+.*?(.[s]{3})?+.*?";
+  bool match = std::regex_match(pattern, std::regex(regex_format));
+
+  if (!match) {
+    gdv_fn_context_set_error_msg(context, "Invalid allowed pattern");
+    *out_len = 0;
+    return "";
+  }
+
+  // length from pattern
+  int res = 0;
+
+  switch (pattern_len) {
+    // yyyy
+    case 4:
+      res = snprintf(char_buffer, char_buffer_length, "%04" PRId64, year);
+      break;
+    // yyyy-MM
+    case 7:
+      res = snprintf(char_buffer, char_buffer_length, "%04" PRId64 "-%02" PRId64, year,
+                     month);
+      break;
+    // yyyy-MM-dd
+    case 10:
+      res = snprintf(char_buffer, char_buffer_length,
+                     "%04" PRId64 "-%02" PRId64 "-%02" PRId64, year, month, day);
+      break;
+    // yyyy-MM-dd hh
+    case 13:
+      res = snprintf(char_buffer, char_buffer_length,
+                     "%04" PRId64 "-%02" PRId64 "-%02" PRId64 " %02" PRId64, year, month,
+                     day, hour);
+      break;
+    // yyyy-MM-dd hh:mm
+    case 16:
+      res = snprintf(char_buffer, char_buffer_length,
+                     "%04" PRId64 "-%02" PRId64 "-%02" PRId64 " %02" PRId64 ":%02" PRId64,
+                     year, month, day, hour, minute);
+      break;
+    // yyyy-MM-dd hh:mm:ss
+    case 19:
+      res = snprintf(char_buffer, char_buffer_length,
+                     "%04" PRId64 "-%02" PRId64 "-%02" PRId64 " %02" PRId64 ":%02" PRId64
+                     ":%02" PRId64,
+                     year, month, day, hour, minute, second);
+      break;
+    // yyyy-MM-dd hh:mm:ss.sss
+    case 23:

Review comment:
       Ditto

##########
File path: cpp/src/gandiva/precompiled/time.cc
##########
@@ -841,6 +843,161 @@ gdv_int64 castBIGINT_daytimeinterval(gdv_day_time_interval in) {
          extractDay_daytimeinterval(in) * MILLIS_IN_DAY;
 }
 
+FORCE_INLINE
+const char* from_unixtime_int64(gdv_int64 context, gdv_timestamp in, gdv_int32* out_len) {
+  gdv_int64 year = extractYear_timestamp(in);
+  gdv_int64 month = extractMonth_timestamp(in);
+  gdv_int64 day = extractDay_timestamp(in);
+  gdv_int64 hour = extractHour_timestamp(in);
+  gdv_int64 minute = extractMinute_timestamp(in);
+  gdv_int64 second = extractSecond_timestamp(in);
+
+  static const int kTimeStampStringLen = 19;
+  const int char_buffer_length = kTimeStampStringLen + 1;  // snprintf adds \0
+  char char_buffer[char_buffer_length];
+
+  // yyyy-MM-dd hh:mm:ss
+  int res = snprintf(char_buffer, char_buffer_length,
+                     "%04" PRId64 "-%02" PRId64 "-%02" PRId64 " %02" PRId64 ":%02" PRId64
+                     ":%02" PRId64,
+                     year, month, day, hour, minute, second);
+  if (res < 0) {
+    gdv_fn_context_set_error_msg(context, "Could not format the timestamp");
+    *out_len = 0;
+    return "";
+  }
+
+  *out_len = kTimeStampStringLen;
+
+  if (*out_len <= 0) {
+    if (*out_len < 0) {
+      gdv_fn_context_set_error_msg(context, "Length of output string cannot be negative");
+    }
+    *out_len = 0;
+    return "";
+  }
+
+  char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len));
+  if (ret == nullptr) {
+    gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
+    *out_len = 0;
+    return "";
+  }
+
+  memcpy(ret, char_buffer, *out_len);
+  return ret;
+}
+
+FORCE_INLINE
+const char* from_unixtime_int64_utf8(gdv_int64 context, gdv_timestamp in,
+                                     const char* pattern, gdv_int32 pattern_len,
+                                     gdv_int32* out_len) {
+  gdv_int64 year = extractYear_timestamp(in);
+  gdv_int64 month = extractMonth_timestamp(in);
+  gdv_int64 day = extractDay_timestamp(in);
+  gdv_int64 hour = extractHour_timestamp(in);
+  gdv_int64 minute = extractMinute_timestamp(in);
+  gdv_int64 second = extractSecond_timestamp(in);
+  gdv_int64 millis = in % MILLIS_IN_SEC;
+
+  if (pattern_len <= 0) {
+    gdv_fn_context_set_error_msg(context, "Invalid allowed pattern size");
+    *out_len = 0;
+    return "";
+  }
+
+  static const int kTimeStampStringLen = pattern_len;
+  const int char_buffer_length = kTimeStampStringLen + 1;  // snprintf adds \0
+  char char_buffer[char_buffer_length];
+
+  const char* regex_format =
+      "y{4}(-[M]{2})?+.*?(-[d]{2})?+.*?( [h]{2})?+.*?"
+      "(:[mm]{2})?+.*?(:[s]{2})?+.*?(.[s]{3})?+.*?";
+  bool match = std::regex_match(pattern, std::regex(regex_format));
+
+  if (!match) {
+    gdv_fn_context_set_error_msg(context, "Invalid allowed pattern");
+    *out_len = 0;
+    return "";
+  }
+
+  // length from pattern
+  int res = 0;
+
+  switch (pattern_len) {
+    // yyyy
+    case 4:
+      res = snprintf(char_buffer, char_buffer_length, "%04" PRId64, year);
+      break;
+    // yyyy-MM
+    case 7:
+      res = snprintf(char_buffer, char_buffer_length, "%04" PRId64 "-%02" PRId64, year,
+                     month);
+      break;
+    // yyyy-MM-dd
+    case 10:
+      res = snprintf(char_buffer, char_buffer_length,
+                     "%04" PRId64 "-%02" PRId64 "-%02" PRId64, year, month, day);
+      break;
+    // yyyy-MM-dd hh
+    case 13:
+      res = snprintf(char_buffer, char_buffer_length,
+                     "%04" PRId64 "-%02" PRId64 "-%02" PRId64 " %02" PRId64, year, month,
+                     day, hour);
+      break;
+    // yyyy-MM-dd hh:mm
+    case 16:
+      res = snprintf(char_buffer, char_buffer_length,
+                     "%04" PRId64 "-%02" PRId64 "-%02" PRId64 " %02" PRId64 ":%02" PRId64,
+                     year, month, day, hour, minute);
+      break;
+    // yyyy-MM-dd hh:mm:ss
+    case 19:

Review comment:
       Ditto

##########
File path: cpp/src/gandiva/precompiled/time_test.cc
##########
@@ -839,4 +839,86 @@ TEST(TestTime, TestToTimeNumeric) {
   EXPECT_EQ(expected_output, to_time_float64(3601.500));
 }
 
-}  // namespace gandiva
+TEST(TestTime, TestFromUnixtimeWithoutPattern) {

Review comment:
       Create some integrated tests for this function in Java or in the `tests` folder

##########
File path: cpp/src/gandiva/precompiled/time.cc
##########
@@ -841,6 +843,161 @@ gdv_int64 castBIGINT_daytimeinterval(gdv_day_time_interval in) {
          extractDay_daytimeinterval(in) * MILLIS_IN_DAY;
 }
 
+FORCE_INLINE
+const char* from_unixtime_int64(gdv_int64 context, gdv_timestamp in, gdv_int32* out_len) {
+  gdv_int64 year = extractYear_timestamp(in);
+  gdv_int64 month = extractMonth_timestamp(in);
+  gdv_int64 day = extractDay_timestamp(in);
+  gdv_int64 hour = extractHour_timestamp(in);
+  gdv_int64 minute = extractMinute_timestamp(in);
+  gdv_int64 second = extractSecond_timestamp(in);
+
+  static const int kTimeStampStringLen = 19;
+  const int char_buffer_length = kTimeStampStringLen + 1;  // snprintf adds \0
+  char char_buffer[char_buffer_length];
+
+  // yyyy-MM-dd hh:mm:ss
+  int res = snprintf(char_buffer, char_buffer_length,
+                     "%04" PRId64 "-%02" PRId64 "-%02" PRId64 " %02" PRId64 ":%02" PRId64
+                     ":%02" PRId64,
+                     year, month, day, hour, minute, second);
+  if (res < 0) {
+    gdv_fn_context_set_error_msg(context, "Could not format the timestamp");
+    *out_len = 0;
+    return "";
+  }
+
+  *out_len = kTimeStampStringLen;
+
+  if (*out_len <= 0) {
+    if (*out_len < 0) {
+      gdv_fn_context_set_error_msg(context, "Length of output string cannot be negative");
+    }
+    *out_len = 0;
+    return "";
+  }
+
+  char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len));
+  if (ret == nullptr) {
+    gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
+    *out_len = 0;
+    return "";
+  }
+
+  memcpy(ret, char_buffer, *out_len);
+  return ret;
+}
+
+FORCE_INLINE
+const char* from_unixtime_int64_utf8(gdv_int64 context, gdv_timestamp in,
+                                     const char* pattern, gdv_int32 pattern_len,
+                                     gdv_int32* out_len) {
+  gdv_int64 year = extractYear_timestamp(in);
+  gdv_int64 month = extractMonth_timestamp(in);
+  gdv_int64 day = extractDay_timestamp(in);
+  gdv_int64 hour = extractHour_timestamp(in);
+  gdv_int64 minute = extractMinute_timestamp(in);
+  gdv_int64 second = extractSecond_timestamp(in);
+  gdv_int64 millis = in % MILLIS_IN_SEC;
+
+  if (pattern_len <= 0) {
+    gdv_fn_context_set_error_msg(context, "Invalid allowed pattern size");
+    *out_len = 0;
+    return "";
+  }
+
+  static const int kTimeStampStringLen = pattern_len;
+  const int char_buffer_length = kTimeStampStringLen + 1;  // snprintf adds \0
+  char char_buffer[char_buffer_length];
+
+  const char* regex_format =
+      "y{4}(-[M]{2})?+.*?(-[d]{2})?+.*?( [h]{2})?+.*?"
+      "(:[mm]{2})?+.*?(:[s]{2})?+.*?(.[s]{3})?+.*?";
+  bool match = std::regex_match(pattern, std::regex(regex_format));
+
+  if (!match) {
+    gdv_fn_context_set_error_msg(context, "Invalid allowed pattern");
+    *out_len = 0;
+    return "";
+  }
+
+  // length from pattern
+  int res = 0;
+
+  switch (pattern_len) {
+    // yyyy
+    case 4:
+      res = snprintf(char_buffer, char_buffer_length, "%04" PRId64, year);
+      break;
+    // yyyy-MM
+    case 7:
+      res = snprintf(char_buffer, char_buffer_length, "%04" PRId64 "-%02" PRId64, year,
+                     month);
+      break;
+    // yyyy-MM-dd
+    case 10:
+      res = snprintf(char_buffer, char_buffer_length,
+                     "%04" PRId64 "-%02" PRId64 "-%02" PRId64, year, month, day);
+      break;
+    // yyyy-MM-dd hh
+    case 13:
+      res = snprintf(char_buffer, char_buffer_length,
+                     "%04" PRId64 "-%02" PRId64 "-%02" PRId64 " %02" PRId64, year, month,
+                     day, hour);
+      break;
+    // yyyy-MM-dd hh:mm
+    case 16:

Review comment:
       Ditto




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org