You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2021/04/29 07:56:00 UTC

[GitHub] [arrow] projjal commented on a change in pull request #10195: ARROW-12595: [C++][Gandiva] Implement TO_HEX([binary] field) function

projjal commented on a change in pull request #10195:
URL: https://github.com/apache/arrow/pull/10195#discussion_r622803545



##########
File path: cpp/src/gandiva/precompiled/string_ops.cc
##########
@@ -1520,4 +1521,42 @@ const char* binary_string(gdv_int64 context, const char* text, gdv_int32 text_le
   return ret;
 }
 
+// Gets a binary object and returns its hexadecimal representation. That representation
+// maps each byte in the input to a 2-length string containing a hexadecimal number.
+// - Examples:
+//     - foo -> 666F6F = 66[f] 6F[o] 6F[o]
+//     - bar -> 626172 = 62[b] 61[a] 72[r]
+FORCE_INLINE
+const char* to_hex_binary(gdv_int64 context, const char* text, gdv_int32 text_len,
+                          gdv_int32* out_len) {
+  if (text_len == 0) {
+    *out_len = 0;
+    return "";
+  }
+
+  auto ret =
+      reinterpret_cast<gdv_utf8>(gdv_fn_context_arena_malloc(context, text_len * 2));
+
+  if (ret == nullptr) {
+    gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
+    *out_len = 0;
+    return "";
+  }
+
+  gdv_uint32 ret_index = 0;
+  gdv_uint32 max_len = static_cast<gdv_uint32>(text_len) * 2;
+  gdv_uint32 max_char_to_write = 4;
+
+  for (gdv_int32 i = 0; i < text_len; i++) {
+    DCHECK(ret_index >= 0 && ret_index < max_len);

Review comment:
       this is probably not required

##########
File path: cpp/src/gandiva/precompiled/string_ops.cc
##########
@@ -1520,4 +1521,42 @@ const char* binary_string(gdv_int64 context, const char* text, gdv_int32 text_le
   return ret;
 }
 
+// Gets a binary object and returns its hexadecimal representation. That representation
+// maps each byte in the input to a 2-length string containing a hexadecimal number.
+// - Examples:
+//     - foo -> 666F6F = 66[f] 6F[o] 6F[o]
+//     - bar -> 626172 = 62[b] 61[a] 72[r]
+FORCE_INLINE
+const char* to_hex_binary(gdv_int64 context, const char* text, gdv_int32 text_len,
+                          gdv_int32* out_len) {
+  if (text_len == 0) {
+    *out_len = 0;
+    return "";
+  }
+
+  auto ret =
+      reinterpret_cast<gdv_utf8>(gdv_fn_context_arena_malloc(context, text_len * 2));
+
+  if (ret == nullptr) {
+    gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
+    *out_len = 0;
+    return "";
+  }
+
+  gdv_uint32 ret_index = 0;

Review comment:
       nit: use normal int32_t in such cases. gdv_* is only for macros

##########
File path: cpp/src/gandiva/precompiled/string_ops_test.cc
##########
@@ -1088,4 +1088,53 @@ TEST(TestStringOps, TestSplitPart) {
   EXPECT_EQ(std::string(out_str, out_len), "ååçåå");
 }
 
+TEST(TestStringOps, TestToHex) {
+  gandiva::ExecutionContext ctx;
+  uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
+  gdv_int32 out_len = 0;
+  gdv_int32 in_len = 0;
+  const char* out_str;
+
+  in_len = 10;
+  out_str = to_hex_binary(ctx_ptr, "TestString", in_len, &out_len);

Review comment:
       These tests are not clear. Why not use an explicit byte array like char array[] = { 0xB0, 0x11, 0xA2 }

##########
File path: cpp/src/gandiva/precompiled/string_ops.cc
##########
@@ -1520,4 +1521,42 @@ const char* binary_string(gdv_int64 context, const char* text, gdv_int32 text_le
   return ret;
 }
 
+// Gets a binary object and returns its hexadecimal representation. That representation
+// maps each byte in the input to a 2-length string containing a hexadecimal number.
+// - Examples:
+//     - foo -> 666F6F = 66[f] 6F[o] 6F[o]
+//     - bar -> 626172 = 62[b] 61[a] 72[r]
+FORCE_INLINE
+const char* to_hex_binary(gdv_int64 context, const char* text, gdv_int32 text_len,
+                          gdv_int32* out_len) {
+  if (text_len == 0) {
+    *out_len = 0;
+    return "";
+  }
+
+  auto ret =
+      reinterpret_cast<gdv_utf8>(gdv_fn_context_arena_malloc(context, text_len * 2));
+
+  if (ret == nullptr) {
+    gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
+    *out_len = 0;
+    return "";
+  }
+
+  gdv_uint32 ret_index = 0;
+  gdv_uint32 max_len = static_cast<gdv_uint32>(text_len) * 2;
+  gdv_uint32 max_char_to_write = 4;
+
+  for (gdv_int32 i = 0; i < text_len; i++) {
+    DCHECK(ret_index >= 0 && ret_index < max_len);
+
+    gdv_int32 ch = static_cast<gdv_int32>(text[i]) & 0xFF;
+
+    ret_index += snprintf(ret + ret_index, max_char_to_write, "%02X", ch);

Review comment:
       since snprintf adds a null character shouldn't max_len be 2 * text_len + 1




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org