You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2020/06/19 14:19:47 UTC

[GitHub] [arrow] pravindra commented on a change in pull request #7402: ARROW-9099: [C++][Gandiva] Implement trim function for string

pravindra commented on a change in pull request #7402:
URL: https://github.com/apache/arrow/pull/7402#discussion_r442867225



##########
File path: cpp/src/gandiva/precompiled/string_ops.cc
##########
@@ -284,6 +284,49 @@ const char* reverse_utf8(gdv_int64 context, const char* data, gdv_int32 data_len
   return ret;
 }
 
+// Trim a utf8 sequence
+FORCE_INLINE
+const char* trim_utf8(gdv_int64 context, const char* data, gdv_int32 data_len,
+                      int32_t* out_len) {
+  if (data_len == 0) {
+    *out_len = 0;
+    return "";
+  }
+
+  gdv_int32 start = 0, end = data_len - 1;
+  // start and end denote the first and last positions of non-space
+  // characters in the input string respectively
+  while (start <= end && data[start] == ' ') {
+    ++start;
+  }
+  while (end >= start && data[end] == ' ') {
+    --end;
+  }
+
+  // string with no leading/trailing spaces, return original string
+  if (start == 0 && end == data_len - 1) {
+    *out_len = data_len;
+    return data;
+  }
+
+  // string with all spaces
+  if (start > end) {
+    *out_len = 0;
+    return "";
+  }
+
+  gdv_int32 length = end - start + 1;
+  char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, length));

Review comment:
       why malloc and copy ? you could do 
   
   *out_len = length;
   return data + start;




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org