You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2022/05/16 14:58:12 UTC

[GitHub] [arrow] ViniciusSouzaRoque commented on a diff in pull request #12306: ARROW-15083: [C++][Gandiva] Implement Conv Function

ViniciusSouzaRoque commented on code in PR #12306:
URL: https://github.com/apache/arrow/pull/12306#discussion_r873832960


##########
cpp/src/gandiva/gdv_function_stubs.cc:
##########
@@ -296,6 +296,215 @@ CAST_NUMERIC_FROM_VARBINARY(double, arrow::DoubleType, FLOAT8)
 #undef GDV_FN_CAST_VARCHAR_INTEGER
 #undef GDV_FN_CAST_VARCHAR_REAL
 
+GDV_FORCE_INLINE
+int64_t unsigned_long_div(int64_t x, int32_t m) {
+  if (x >= 0) {
+    return x / m;
+  }
+  return x / m + 2 * (LONG_MAX / m) + 2 / m + (x % m + 2 * (LONG_MAX % m) + 2 % m) / m;
+}
+
+GDV_FORCE_INLINE
+int64_t encode(int32_t radix, int32_t fromPos, const char* value, int32_t valueLen) {
+  int64_t val = 0;
+  int64_t bound = unsigned_long_div(-1 - radix, radix);
+
+  for (int i = fromPos; i < valueLen && value[i] >= 0; i++) {
+    if (val >= bound) {
+      if (unsigned_long_div(-1 - value[i], radix) < val) {
+        return -1;
+      }
+    }
+    val = val * radix + value[i];
+  }
+  return val;
+}
+
+GDV_FORCE_INLINE
+void decode(uint64_t val, int32_t radix, char* value, int32_t valueLen) {
+  for (int i = 0; i < valueLen; i++) {
+    value[i] = static_cast<char>(0);
+  }
+
+  for (int i = valueLen - 1; val != 0; i--) {
+    uint64_t q = unsigned_long_div(val, radix);
+    value[i] = static_cast<char>((val - q * radix));
+    val = q;
+  }
+}
+
+// From Decimal to Any Base
+GDV_FORCE_INLINE
+char character_for_digit(int32_t value, int32_t radix) {
+  // This function is similar to Character.forDigit in Java
+  int digit = 0;
+  digit = value % radix;
+  if (digit < 10) {
+    return static_cast<char>(digit + '0');
+  } else {
+    return static_cast<char>(digit + 'A' - 10);
+  }
+}
+
+// From any base to Decimal
+GDV_FORCE_INLINE
+int64_t character_digit(char value, int32_t radix, int32_t& valid_entry) {
+  // This function is similar to Character.digit in Java
+  if ((radix <= 0) || (radix > 36)) {
+    valid_entry = -1;
+    return -1;
+  }
+
+  if (radix <= 10) {
+    if (value >= '0' && value < '0' + radix) {
+      return value - '0';
+    } else {
+      valid_entry = -1;
+      return -1;
+    }
+  } else if (value >= '0' && value <= '9') {
+    return value - '0';
+  } else if (value >= 'a' && value < 'a' + radix - 10) {
+    return value - 'a' + 10;
+  } else if (value >= 'A' && value < 'A' + radix - 10) {
+    return value - 'A' + 10;
+  }
+  valid_entry = -1;
+  return -1;
+}
+
+GDV_FORCE_INLINE
+void byte2char(int32_t radix, int32_t fromPos, char* value, int32_t valueLen) {
+  for (int i = fromPos; i < valueLen; i++) {
+    value[i] = static_cast<char>(character_for_digit(value[i], radix));
+  }
+}
+
+GDV_FORCE_INLINE
+void char2byte(int32_t radix, int32_t fromPos, char* value, int32_t valueLen,
+               int32_t* valid_entry) {
+  for (int i = fromPos; i < valueLen; i++) {
+    value[i] = static_cast<char>(character_digit(value[i], radix, *valid_entry));
+    if (*valid_entry != 1) {
+      break;
+    }
+  }
+}
+
+GANDIVA_EXPORT
+const char* conv_int64_int32_int32(int64_t context, int64_t in, int32_t from_base,
+                                   int32_t to_base, int32_t* out_len) {
+  std::string to_utf8 = std::to_string(in);
+  char* in_utf8 = &to_utf8[0];
+  auto in_utf8_len = static_cast<int32_t>(to_utf8.length());
+
+  return conv_utf8_int32_int32(context, in_utf8, in_utf8_len, from_base, to_base,
+                               out_len);
+}
+
+GANDIVA_EXPORT
+const char* conv_int32_int32_int32(int64_t context, int32_t in, int32_t from_base,
+                                   int32_t to_base, int32_t* out_len) {
+  std::string to_utf8 = std::to_string(in);
+  char* in_utf8 = &to_utf8[0];
+  auto in_utf8_len = static_cast<int32_t>(to_utf8.length());
+
+  return conv_utf8_int32_int32(context, in_utf8, in_utf8_len, from_base, to_base,
+                               out_len);
+}
+
+GANDIVA_EXPORT
+const char* conv_utf8_int32_int32(int64_t context, const char* in, int32_t in_len,
+                                  int32_t from_base, int32_t to_base, int32_t* out_len) {
+  if (in_len <= 0) {
+    out_len = 0;
+    return "";
+  }
+
+  int32_t valueLen = 64;
+  char* value = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, valueLen));
+  char* num = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, in_len));
+
+  if (value == nullptr || num == nullptr) {
+    gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
+    out_len = 0;
+    return "";
+  }
+
+  int fromBs = from_base;
+  int toBs = to_base;
+
+  if (fromBs < -36 || fromBs > 36 || fromBs == 0 || fromBs == 1 || abs(toBs) < -36 ||
+      abs(toBs) > 36 || abs(toBs) == 0 || abs(toBs) == 1) {
+    // Checking if the variable is in range limit
+    gdv_fn_context_set_error_msg(context,
+                                 "The numerical limit of this variable is out range");
+    *out_len = 0;
+    return "";
+  }

Review Comment:
   This is not redundant, using the value numeric_limits<int>::min() like toBs, this condition (**abs(toBs) < -36**) is true
   I believe we will need to keep this validation
   
   this check is necessary as we will only handle base inputs and outputs between -36 and +36 with the exception of 1 and 0, as specified in Hive function



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org