You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by pr...@apache.org on 2019/08/09 11:32:44 UTC

[arrow] branch master updated: ARROW-6162: [C++][Gandiva] Do not truncate string in castVARCHAR_utf8 if output length is zero

This is an automated email from the ASF dual-hosted git repository.

praveenbingo pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 0d1c7ec  ARROW-6162: [C++][Gandiva] Do not truncate string in castVARCHAR_utf8 if output length is zero
0d1c7ec is described below

commit 0d1c7ec855560594a815f3a16561472dd7f18c6c
Author: Prudhvi Porandla <pr...@icloud.com>
AuthorDate: Fri Aug 9 17:02:22 2019 +0530

    ARROW-6162: [C++][Gandiva] Do not truncate string in castVARCHAR_utf8 if output length is zero
    
    Do not truncate string if length parameter is 0 in castVARCHAR_utf8_int64 function.
    
    Closes #5040 from pprudhvi/castutf8utf8 and squashes the following commits:
    
    3498a864b <Prudhvi Porandla> add unittest
    234fa5bf1 <Prudhvi Porandla> return full string if out_len is 0
    
    Authored-by: Prudhvi Porandla <pr...@icloud.com>
    Signed-off-by: Praveen <pr...@dremio.com>
---
 cpp/src/gandiva/precompiled/string_ops.cc      |  5 +++--
 cpp/src/gandiva/precompiled/string_ops_test.cc | 20 ++++++++++++++++++++
 cpp/src/gandiva/precompiled/types.h            |  3 +++
 3 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/cpp/src/gandiva/precompiled/string_ops.cc b/cpp/src/gandiva/precompiled/string_ops.cc
index 09675af..31b7eed 100644
--- a/cpp/src/gandiva/precompiled/string_ops.cc
+++ b/cpp/src/gandiva/precompiled/string_ops.cc
@@ -165,8 +165,9 @@ FORCE_INLINE
 char* castVARCHAR_utf8_int64(int64 context, const char* data, int32 data_len,
                              int64_t out_len, int32_t* out_length) {
   // TODO: handle allocation failures
-  int32_t len = data_len <= static_cast<int32_t>(out_len) ? data_len
-                                                          : static_cast<int32_t>(out_len);
+  int32_t len = data_len <= static_cast<int32_t>(out_len) || out_len == 0
+                    ? data_len
+                    : static_cast<int32_t>(out_len);
   char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, len));
   memcpy(ret, data, len);
   *out_length = len;
diff --git a/cpp/src/gandiva/precompiled/string_ops_test.cc b/cpp/src/gandiva/precompiled/string_ops_test.cc
index f34de68..8a644a4 100644
--- a/cpp/src/gandiva/precompiled/string_ops_test.cc
+++ b/cpp/src/gandiva/precompiled/string_ops_test.cc
@@ -18,6 +18,7 @@
 #include <gtest/gtest.h>
 #include "gandiva/execution_context.h"
 #include "gandiva/precompiled/types.h"
+
 namespace gandiva {
 
 TEST(TestStringOps, TestCompare) {
@@ -74,6 +75,25 @@ TEST(TestStringOps, TestCharLength) {
       << ctx.get_error();
 }
 
+TEST(TestStringOps, TestCastVarhcar) {
+  gandiva::ExecutionContext ctx;
+  uint64_t ctx_ptr = reinterpret_cast<int64>(&ctx);
+  int32 out_len = 0;
+
+  char* out_str = castVARCHAR_utf8_int64(ctx_ptr, "asdf", 4, 1, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "a");
+  EXPECT_FALSE(ctx.has_error());
+
+  out_str = castVARCHAR_utf8_int64(ctx_ptr, "asdf", 4, 6, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "asdf");
+  EXPECT_FALSE(ctx.has_error());
+
+  // do not truncate if output length is 0
+  out_str = castVARCHAR_utf8_int64(ctx_ptr, "asdf", 4, 0, &out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "asdf");
+  EXPECT_FALSE(ctx.has_error());
+}
+
 TEST(TestStringOps, TestSubstring) {
   gandiva::ExecutionContext ctx;
   uint64_t ctx_ptr = reinterpret_cast<int64>(&ctx);
diff --git a/cpp/src/gandiva/precompiled/types.h b/cpp/src/gandiva/precompiled/types.h
index 2332db4..2935ecf 100644
--- a/cpp/src/gandiva/precompiled/types.h
+++ b/cpp/src/gandiva/precompiled/types.h
@@ -174,6 +174,9 @@ const char* substr_utf8_int64(int64 context, const char* input, int32 in_len,
                               int64 offset64, int32* out_len);
 const char* concatOperator_utf8_utf8(int64 context, const char* left, int32 left_len,
                                      const char* right, int32 right_len, int32* out_len);
+
+char* castVARCHAR_utf8_int64(int64 context, const char* data, int32 data_len,
+                             int64_t out_len, int32_t* out_length);
 }  // extern "C"
 
 #endif  // PRECOMPILED_TYPES_H