You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2021/05/12 11:38:33 UTC

[GitHub] [arrow] jpedroantunes commented on a change in pull request #10023: ARROW-12378: [C++][Gandiva] Implement castVARBINARY functions

jpedroantunes commented on a change in pull request #10023:
URL: https://github.com/apache/arrow/pull/10023#discussion_r630961867



##########
File path: cpp/src/gandiva/gdv_function_stubs.cc
##########
@@ -307,85 +307,114 @@ CAST_NUMERIC_FROM_STRING(double, arrow::DoubleType, FLOAT8)
 
 #undef CAST_NUMERIC_FROM_STRING
 
-#define GDV_FN_CAST_VARCHAR_INTEGER(IN_TYPE, ARROW_TYPE)                                 \
-  GANDIVA_EXPORT                                                                         \
-  const char* gdv_fn_castVARCHAR_##IN_TYPE##_int64(int64_t context, gdv_##IN_TYPE value, \
-                                                   int64_t len, int32_t * out_len) {     \
-    if (len < 0) {                                                                       \
-      gdv_fn_context_set_error_msg(context, "Buffer length can not be negative");        \
-      *out_len = 0;                                                                      \
-      return "";                                                                         \
-    }                                                                                    \
-    if (len == 0) {                                                                      \
-      *out_len = 0;                                                                      \
-      return "";                                                                         \
-    }                                                                                    \
-    arrow::internal::StringFormatter<arrow::ARROW_TYPE> formatter;                       \
-    char* ret = reinterpret_cast<char*>(                                                 \
-        gdv_fn_context_arena_malloc(context, static_cast<int32_t>(len)));                \
-    if (ret == nullptr) {                                                                \
-      gdv_fn_context_set_error_msg(context, "Could not allocate memory");                \
-      *out_len = 0;                                                                      \
-      return "";                                                                         \
-    }                                                                                    \
-    arrow::Status status = formatter(value, [&](arrow::util::string_view v) {            \
-      int64_t size = static_cast<int64_t>(v.size());                                     \
-      *out_len = static_cast<int32_t>(len < size ? len : size);                          \
-      memcpy(ret, v.data(), *out_len);                                                   \
-      return arrow::Status::OK();                                                        \
-    });                                                                                  \
-    if (!status.ok()) {                                                                  \
-      std::string err = "Could not cast " + std::to_string(value) + " to string";        \
-      gdv_fn_context_set_error_msg(context, err.c_str());                                \
-      *out_len = 0;                                                                      \
-      return "";                                                                         \
-    }                                                                                    \
-    return ret;                                                                          \
+// Add functions for castVARBINARY for utf8 and binary
+#define CAST_VARBINARY_FROM_STRING_AND_BINARY(TYPE_NAME)                               \

Review comment:
       Solved

##########
File path: cpp/src/gandiva/gdv_function_stubs.cc
##########
@@ -307,85 +307,114 @@ CAST_NUMERIC_FROM_STRING(double, arrow::DoubleType, FLOAT8)
 
 #undef CAST_NUMERIC_FROM_STRING
 
-#define GDV_FN_CAST_VARCHAR_INTEGER(IN_TYPE, ARROW_TYPE)                                 \
-  GANDIVA_EXPORT                                                                         \
-  const char* gdv_fn_castVARCHAR_##IN_TYPE##_int64(int64_t context, gdv_##IN_TYPE value, \
-                                                   int64_t len, int32_t * out_len) {     \
-    if (len < 0) {                                                                       \
-      gdv_fn_context_set_error_msg(context, "Buffer length can not be negative");        \
-      *out_len = 0;                                                                      \
-      return "";                                                                         \
-    }                                                                                    \
-    if (len == 0) {                                                                      \
-      *out_len = 0;                                                                      \
-      return "";                                                                         \
-    }                                                                                    \
-    arrow::internal::StringFormatter<arrow::ARROW_TYPE> formatter;                       \
-    char* ret = reinterpret_cast<char*>(                                                 \
-        gdv_fn_context_arena_malloc(context, static_cast<int32_t>(len)));                \
-    if (ret == nullptr) {                                                                \
-      gdv_fn_context_set_error_msg(context, "Could not allocate memory");                \
-      *out_len = 0;                                                                      \
-      return "";                                                                         \
-    }                                                                                    \
-    arrow::Status status = formatter(value, [&](arrow::util::string_view v) {            \
-      int64_t size = static_cast<int64_t>(v.size());                                     \
-      *out_len = static_cast<int32_t>(len < size ? len : size);                          \
-      memcpy(ret, v.data(), *out_len);                                                   \
-      return arrow::Status::OK();                                                        \
-    });                                                                                  \
-    if (!status.ok()) {                                                                  \
-      std::string err = "Could not cast " + std::to_string(value) + " to string";        \
-      gdv_fn_context_set_error_msg(context, err.c_str());                                \
-      *out_len = 0;                                                                      \
-      return "";                                                                         \
-    }                                                                                    \
-    return ret;                                                                          \
+// Add functions for castVARBINARY for utf8 and binary
+#define CAST_VARBINARY_FROM_STRING_AND_BINARY(TYPE_NAME)                               \
+  GANDIVA_EXPORT                                                                       \
+  const char* gdv_fn_castVARBINARY_##TYPE_NAME(int64_t context, const char* data,      \
+                                               int32_t data_len, int64_t out_len,      \
+                                               int64_t* out_length) {                  \
+    if (out_len < 0) {                                                                 \
+      gdv_fn_context_set_error_msg(context, "Output buffer length can't be negative"); \
+      *out_length = 0;                                                                 \
+      return "";                                                                       \
+    }                                                                                  \
+                                                                                       \
+    if (out_len >= data_len || out_len == 0) {                                         \
+      *out_length = data_len;                                                          \
+    } else {                                                                           \
+      *out_length = out_len;                                                           \
+    }                                                                                  \
+    return data;                                                                       \
   }
 
-#define GDV_FN_CAST_VARCHAR_REAL(IN_TYPE, ARROW_TYPE)                                    \
-  GANDIVA_EXPORT                                                                         \
-  const char* gdv_fn_castVARCHAR_##IN_TYPE##_int64(int64_t context, gdv_##IN_TYPE value, \
-                                                   int64_t len, int32_t * out_len) {     \
-    if (len < 0) {                                                                       \
-      gdv_fn_context_set_error_msg(context, "Buffer length can not be negative");        \
-      *out_len = 0;                                                                      \
-      return "";                                                                         \
-    }                                                                                    \
-    if (len == 0) {                                                                      \
-      *out_len = 0;                                                                      \
-      return "";                                                                         \
-    }                                                                                    \
-    gandiva::GdvStringFormatter<arrow::ARROW_TYPE> formatter;                            \
-    char* ret = reinterpret_cast<char*>(                                                 \
-        gdv_fn_context_arena_malloc(context, static_cast<int32_t>(len)));                \
-    if (ret == nullptr) {                                                                \
-      gdv_fn_context_set_error_msg(context, "Could not allocate memory");                \
-      *out_len = 0;                                                                      \
-      return "";                                                                         \
-    }                                                                                    \
-    arrow::Status status = formatter(value, [&](arrow::util::string_view v) {            \
-      int64_t size = static_cast<int64_t>(v.size());                                     \
-      *out_len = static_cast<int32_t>(len < size ? len : size);                          \
-      memcpy(ret, v.data(), *out_len);                                                   \
-      return arrow::Status::OK();                                                        \
-    });                                                                                  \
-    if (!status.ok()) {                                                                  \
-      std::string err = "Could not cast " + std::to_string(value) + " to string";        \
-      gdv_fn_context_set_error_msg(context, err.c_str());                                \
-      *out_len = 0;                                                                      \
-      return "";                                                                         \
-    }                                                                                    \
-    return ret;                                                                          \
+CAST_VARBINARY_FROM_STRING_AND_BINARY(utf8)
+CAST_VARBINARY_FROM_STRING_AND_BINARY(binary)
+
+#undef CAST_VARBINARY_FROM_STRING_AND_BINARY
+
+#define GDV_FN_CAST_VARCHAR_VARBINARY_INTEGER(IN_TYPE, CAST_NAME, ARROW_TYPE)     \
+  GANDIVA_EXPORT                                                                  \
+  const char* gdv_fn_cast##CAST_NAME##_##IN_TYPE##_int64(                         \
+      int64_t context, gdv_##IN_TYPE value, int64_t len, int32_t * out_len) {     \
+    if (len < 0) {                                                                \
+      gdv_fn_context_set_error_msg(context, "Buffer length can not be negative"); \
+      *out_len = 0;                                                               \
+      return "";                                                                  \
+    }                                                                             \
+    if (len == 0) {                                                               \
+      *out_len = 0;                                                               \
+      return "";                                                                  \
+    }                                                                             \
+    arrow::internal::StringFormatter<arrow::ARROW_TYPE> formatter;                \
+    char* ret = reinterpret_cast<char*>(                                          \
+        gdv_fn_context_arena_malloc(context, static_cast<int32_t>(len)));         \
+    if (ret == nullptr) {                                                         \
+      gdv_fn_context_set_error_msg(context, "Could not allocate memory");         \
+      *out_len = 0;                                                               \
+      return "";                                                                  \
+    }                                                                             \
+    arrow::Status status = formatter(value, [&](arrow::util::string_view v) {     \
+      int64_t size = static_cast<int64_t>(v.size());                              \
+      *out_len = static_cast<int32_t>(len < size ? len : size);                   \
+      memcpy(ret, v.data(), *out_len);                                            \
+      return arrow::Status::OK();                                                 \
+    });                                                                           \
+    if (!status.ok()) {                                                           \
+      std::string err = "Could not cast " + std::to_string(value) + " to string"; \
+      gdv_fn_context_set_error_msg(context, err.c_str());                         \
+      *out_len = 0;                                                               \
+      return "";                                                                  \
+    }                                                                             \
+    return ret;                                                                   \
   }
 
-GDV_FN_CAST_VARCHAR_INTEGER(int32, Int32Type)
-GDV_FN_CAST_VARCHAR_INTEGER(int64, Int64Type)
-GDV_FN_CAST_VARCHAR_REAL(float32, FloatType)
-GDV_FN_CAST_VARCHAR_REAL(float64, DoubleType)
+#define GDV_FN_CAST_VARCHAR_VARBINARY_REAL(IN_TYPE, CAST_NAME, ARROW_TYPE)        \
+  GANDIVA_EXPORT                                                                  \
+  const char* gdv_fn_cast##CAST_NAME##_##IN_TYPE##_int64(                         \
+      int64_t context, gdv_##IN_TYPE value, int64_t len, int32_t * out_len) {     \
+    if (len < 0) {                                                                \
+      gdv_fn_context_set_error_msg(context, "Buffer length can not be negative"); \
+      *out_len = 0;                                                               \
+      return "";                                                                  \
+    }                                                                             \
+    if (len == 0) {                                                               \
+      *out_len = 0;                                                               \
+      return "";                                                                  \
+    }                                                                             \
+    gandiva::GdvStringFormatter<arrow::ARROW_TYPE> formatter;                     \
+    char* ret = reinterpret_cast<char*>(                                          \
+        gdv_fn_context_arena_malloc(context, static_cast<int32_t>(len)));         \
+    if (ret == nullptr) {                                                         \
+      gdv_fn_context_set_error_msg(context, "Could not allocate memory");         \
+      *out_len = 0;                                                               \
+      return "";                                                                  \
+    }                                                                             \
+    arrow::Status status = formatter(value, [&](arrow::util::string_view v) {     \
+      int64_t size = static_cast<int64_t>(v.size());                              \
+      *out_len = static_cast<int32_t>(len < size ? len : size);                   \
+      memcpy(ret, v.data(), *out_len);                                            \
+      return arrow::Status::OK();                                                 \
+    });                                                                           \
+    if (!status.ok()) {                                                           \
+      std::string err = "Could not cast " + std::to_string(value) + " to string"; \
+      gdv_fn_context_set_error_msg(context, err.c_str());                         \
+      *out_len = 0;                                                               \
+      return "";                                                                  \
+    }                                                                             \
+    return ret;                                                                   \
+  }
 
-#undef GDV_FN_CAST_VARCHAR_INTEGER
-#undef GDV_FN_CAST_VARCHAR_REAL
+GDV_FN_CAST_VARCHAR_VARBINARY_INTEGER(int32, VARCHAR, Int32Type)

Review comment:
       Solved

##########
File path: cpp/src/gandiva/function_registry_string.cc
##########
@@ -236,6 +236,30 @@ std::vector<NativeFunction> GetStringFunctionRegistry() {
       NativeFunction("binary_string", {}, DataTypeVector{utf8()}, binary(),
                      kResultNullIfNull, "binary_string", NativeFunction::kNeedsContext),
 
+      NativeFunction("castVARBINARY", {}, DataTypeVector{binary(), int64()}, binary(),
+                     kResultNullIfNull, "gdv_fn_castVARBINARY_binary",

Review comment:
       Solved




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org