You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by st...@apache.org on 2022/02/11 07:09:19 UTC

[impala] 02/02: IMPALA-955: BYTES built-in function

This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit bde995483a1b6e91dc5d089dfc07225a93d7c8ca
Author: pranav.lodha <pr...@cloudera.com>
AuthorDate: Thu Feb 3 10:04:51 2022 +0530

    IMPALA-955: BYTES built-in function
    
    The Bytes function returns the number of bytes contained
    in the specified byte string. There are changes in
    4 files. A few testcases are also added in
    be/src/exprs/expr-test.cc and an end-to end test in
    testdata/workloads/functional-query/queries/QueryTest/exprs.test.
    
    Change-Id: I0bd06c3d6dba354d71f63c649eaa8f9f74d266ee
    Reviewed-on: http://gerrit.cloudera.org:8080/18210
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/exprs/expr-test.cc                                 |  8 ++++++++
 be/src/exprs/string-functions-ir.cc                       |  4 ++++
 be/src/exprs/string-functions.h                           |  1 +
 common/function-registry/impala_functions.py              |  1 +
 .../functional-query/queries/QueryTest/exprs.test         | 15 +++++++++++++++
 5 files changed, 29 insertions(+)

diff --git a/be/src/exprs/expr-test.cc b/be/src/exprs/expr-test.cc
index a7c0aa7..62b59ac 100644
--- a/be/src/exprs/expr-test.cc
+++ b/be/src/exprs/expr-test.cc
@@ -10705,6 +10705,14 @@ TEST_P(ExprTest, Utf8MaskTest) {
   executor_->PopExecOption();
 }
 
+TEST_P(ExprTest, BytesTest) {
+  // Verifies Bytes(exp) counts number of bytes.
+  TestIsNull("Bytes(NULL)", TYPE_INT);
+  TestValue("Bytes('你好')", TYPE_INT, 6);
+  TestValue("Bytes('你好hello')", TYPE_INT, 11);
+  TestValue("Bytes('你好 hello 你好')", TYPE_INT, 19);
+  TestValue("Bytes('hello')", TYPE_INT, 5);
+}
 TEST_P(ExprTest, Utf8Test) {
   // Verifies utf8_length() counts length by UTF-8 characters instead of bytes.
   // '你' and '好' are both encoded into 3 bytes.
diff --git a/be/src/exprs/string-functions-ir.cc b/be/src/exprs/string-functions-ir.cc
index 9e4dc7d..f8a1a61 100644
--- a/be/src/exprs/string-functions-ir.cc
+++ b/be/src/exprs/string-functions-ir.cc
@@ -258,6 +258,10 @@ IntVal StringFunctions::Length(FunctionContext* context, const StringVal& str) {
   }
   return IntVal(str.len);
 }
+IntVal StringFunctions::Bytes(FunctionContext* context,const StringVal& str){
+  if(str.is_null) return IntVal::null();
+  return IntVal(str.len);
+}
 
 IntVal StringFunctions::CharLength(FunctionContext* context, const StringVal& str) {
   if (str.is_null) return IntVal::null();
diff --git a/be/src/exprs/string-functions.h b/be/src/exprs/string-functions.h
index aa0544a..b9a2248 100644
--- a/be/src/exprs/string-functions.h
+++ b/be/src/exprs/string-functions.h
@@ -72,6 +72,7 @@ class StringFunctions {
       const StringVal& pad);
   static StringVal Rpad(FunctionContext*, const StringVal& str, const BigIntVal&,
       const StringVal& pad);
+  static IntVal Bytes(FunctionContext*, const StringVal& str);
   static IntVal Length(FunctionContext*, const StringVal& str);
   static IntVal CharLength(FunctionContext*, const StringVal& str);
   static IntVal Utf8Length(FunctionContext*, const StringVal& str);
diff --git a/common/function-registry/impala_functions.py b/common/function-registry/impala_functions.py
index dc01349..ee2a9a5 100644
--- a/common/function-registry/impala_functions.py
+++ b/common/function-registry/impala_functions.py
@@ -507,6 +507,7 @@ visible_functions = [
   [['repeat'], 'STRING', ['STRING', 'BIGINT'], 'impala::StringFunctions::Repeat'],
   [['lpad'], 'STRING', ['STRING', 'BIGINT', 'STRING'], 'impala::StringFunctions::Lpad'],
   [['rpad'], 'STRING', ['STRING', 'BIGINT', 'STRING'], 'impala::StringFunctions::Rpad'],
+  [['bytes'], 'INT', ['STRING'], 'impala::StringFunctions::Bytes'],
   [['length'], 'INT', ['STRING'], 'impala::StringFunctions::Length'],
   [['length'], 'INT', ['CHAR'], 'impala::StringFunctions::CharLength'],
   [['char_length'], 'INT', ['STRING'], 'impala::StringFunctions::Length'],
diff --git a/testdata/workloads/functional-query/queries/QueryTest/exprs.test b/testdata/workloads/functional-query/queries/QueryTest/exprs.test
index 0bb674a..cff4f15 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/exprs.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/exprs.test
@@ -3200,4 +3200,19 @@ select 'escape' like 'escap_'
 true
 ---- TYPES
 BOOLEAN
+====
+---- QUERY: IMPALA-955
+# Returns number of bytes in a byte string.
+select bytes(string_col), bytes(date_string_col) from functional.alltypestiny;
+---- RESULTS
+1,8
+1,8
+1,8
+1,8
+1,8
+1,8
+1,8
+1,8
+---- TYPES
+INT, INT
 ====
\ No newline at end of file