You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2022/04/06 17:21:42 UTC

[GitHub] [arrow] vvellanki commented on a diff in pull request #12803: ARROW-16134: [C++][GANDIVA] Fix Concat_WS errors return

vvellanki commented on code in PR #12803:
URL: https://github.com/apache/arrow/pull/12803#discussion_r844181958


##########
cpp/src/gandiva/precompiled/string_ops.cc:
##########
@@ -2393,152 +2393,239 @@ const char* byte_substr_binary_int32_int32(gdv_int64 context, const char* text,
   return ret;
 }
 
+FORCE_INLINE
+void concat_word(char* tmp, int* out_tmp, bool* last, const char* word, int word_len,
+                 bool word_validity) {
+  if (word_validity) {
+    memcpy(tmp + *out_tmp, word, word_len);
+    *out_tmp += word_len;
+    *last = true;
+  }
+}
+
+FORCE_INLINE
+void concat_separator(char* tmp, int* out_tmp, const char* separator, int separator_len,
+                      bool last, bool next) {
+  if (last && next) {
+    memcpy(tmp + *out_tmp, separator, separator_len);
+    *out_tmp += separator_len;
+  }
+}
+
 FORCE_INLINE
 const char* concat_ws_utf8_utf8(int64_t context, const char* separator,
-                                int32_t separator_len, const char* word1,
-                                int32_t word1_len, const char* word2, int32_t word2_len,
-                                int32_t* out_len) {
-  if (word1_len < 0 || word2_len < 0 || separator_len < 0) {
-    gdv_fn_context_set_error_msg(context, "All words can not be null.");
+                                int32_t separator_len, bool separator_validity,
+                                const char* word1, int32_t word1_len, bool word1_validity,
+                                const char* word2, int32_t word2_len, bool word2_validity,
+                                bool* out_valid, int32_t* out_len) {
+  *out_len = 0;
+  // If separator is null, always return null
+  if (!separator_validity) {
     *out_len = 0;
+    *out_valid = false;
     return "";
   }
+  *out_len += separator_len;
+  if (word1_validity) {
+    *out_len += word1_len;
+  }
+  if (word2_validity) {
+    *out_len += word2_len;
+  }
 
-  *out_len = word1_len + separator_len + word2_len;
   char* out = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len));
   if (out == nullptr) {
-    gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
     *out_len = 0;
+    *out_valid = false;

Review Comment:
   Is this the expected behaviour when we are unable to allocate memory?



##########
cpp/src/gandiva/precompiled/string_ops.cc:
##########
@@ -2393,152 +2393,239 @@ const char* byte_substr_binary_int32_int32(gdv_int64 context, const char* text,
   return ret;
 }
 
+FORCE_INLINE
+void concat_word(char* tmp, int* out_tmp, bool* last, const char* word, int word_len,
+                 bool word_validity) {
+  if (word_validity) {
+    memcpy(tmp + *out_tmp, word, word_len);
+    *out_tmp += word_len;
+    *last = true;
+  }
+}
+
+FORCE_INLINE
+void concat_separator(char* tmp, int* out_tmp, const char* separator, int separator_len,
+                      bool last, bool next) {
+  if (last && next) {
+    memcpy(tmp + *out_tmp, separator, separator_len);
+    *out_tmp += separator_len;
+  }
+}
+
 FORCE_INLINE
 const char* concat_ws_utf8_utf8(int64_t context, const char* separator,
-                                int32_t separator_len, const char* word1,
-                                int32_t word1_len, const char* word2, int32_t word2_len,
-                                int32_t* out_len) {
-  if (word1_len < 0 || word2_len < 0 || separator_len < 0) {
-    gdv_fn_context_set_error_msg(context, "All words can not be null.");
+                                int32_t separator_len, bool separator_validity,
+                                const char* word1, int32_t word1_len, bool word1_validity,
+                                const char* word2, int32_t word2_len, bool word2_validity,
+                                bool* out_valid, int32_t* out_len) {
+  *out_len = 0;
+  // If separator is null, always return null
+  if (!separator_validity) {
     *out_len = 0;
+    *out_valid = false;
     return "";
   }
+  *out_len += separator_len;
+  if (word1_validity) {
+    *out_len += word1_len;
+  }
+  if (word2_validity) {
+    *out_len += word2_len;
+  }
 
-  *out_len = word1_len + separator_len + word2_len;
   char* out = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len));
   if (out == nullptr) {
-    gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
     *out_len = 0;
+    *out_valid = false;
     return "";
   }
 
+  bool last = false;
+  bool next = false;
+
   char* tmp = out;
-  memcpy(tmp, word1, word1_len);
-  tmp += word1_len;
-  memcpy(tmp, separator, separator_len);
-  tmp += separator_len;
-  memcpy(tmp, word2, word2_len);
+  int out_tmp = 0;
+
+  concat_word(tmp, &out_tmp, &last, word1, word1_len, word1_validity);

Review Comment:
   The current helper functions are taking too many arguments and are confusing to read



##########
cpp/src/gandiva/precompiled/string_ops.cc:
##########
@@ -2393,152 +2393,239 @@ const char* byte_substr_binary_int32_int32(gdv_int64 context, const char* text,
   return ret;
 }
 
+FORCE_INLINE
+void concat_word(char* tmp, int* out_tmp, bool* last, const char* word, int word_len,
+                 bool word_validity) {
+  if (word_validity) {
+    memcpy(tmp + *out_tmp, word, word_len);
+    *out_tmp += word_len;
+    *last = true;
+  }
+}
+
+FORCE_INLINE
+void concat_separator(char* tmp, int* out_tmp, const char* separator, int separator_len,
+                      bool last, bool next) {
+  if (last && next) {
+    memcpy(tmp + *out_tmp, separator, separator_len);
+    *out_tmp += separator_len;
+  }
+}
+
 FORCE_INLINE
 const char* concat_ws_utf8_utf8(int64_t context, const char* separator,
-                                int32_t separator_len, const char* word1,
-                                int32_t word1_len, const char* word2, int32_t word2_len,
-                                int32_t* out_len) {
-  if (word1_len < 0 || word2_len < 0 || separator_len < 0) {
-    gdv_fn_context_set_error_msg(context, "All words can not be null.");
+                                int32_t separator_len, bool separator_validity,
+                                const char* word1, int32_t word1_len, bool word1_validity,
+                                const char* word2, int32_t word2_len, bool word2_validity,
+                                bool* out_valid, int32_t* out_len) {
+  *out_len = 0;
+  // If separator is null, always return null
+  if (!separator_validity) {
     *out_len = 0;
+    *out_valid = false;
     return "";
   }
+  *out_len += separator_len;
+  if (word1_validity) {
+    *out_len += word1_len;
+  }
+  if (word2_validity) {
+    *out_len += word2_len;
+  }
 
-  *out_len = word1_len + separator_len + word2_len;
   char* out = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len));
   if (out == nullptr) {
-    gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
     *out_len = 0;
+    *out_valid = false;
     return "";
   }
 
+  bool last = false;
+  bool next = false;
+
   char* tmp = out;
-  memcpy(tmp, word1, word1_len);
-  tmp += word1_len;
-  memcpy(tmp, separator, separator_len);
-  tmp += separator_len;
-  memcpy(tmp, word2, word2_len);
+  int out_tmp = 0;
+
+  concat_word(tmp, &out_tmp, &last, word1, word1_len, word1_validity);

Review Comment:
   Will something like the below not simplify the code:
   concat_word(char *out_buf, int *out_idx, char *in_buf, int in_len, bool in_validity, char *separator) {
     if (!in_validity) { return; }
   
     // input is valid
     if (*out_idx != 0) {
       // copy the separator and update *out_idx
     }
     // copy the input and update *out_idx
   }
   
   if you have this function, you can call this for each and every argument. Isn't this correct?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org