You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by st...@apache.org on 2022/02/11 07:09:19 UTC
[impala] 02/02: IMPALA-955: BYTES built-in function
This is an automated email from the ASF dual-hosted git repository.
stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit bde995483a1b6e91dc5d089dfc07225a93d7c8ca
Author: pranav.lodha <pr...@cloudera.com>
AuthorDate: Thu Feb 3 10:04:51 2022 +0530
IMPALA-955: BYTES built-in function
The Bytes function returns the number of bytes contained
in the specified byte string. There are changes in
4 files. A few testcases are also added in
be/src/exprs/expr-test.cc and an end-to end test in
testdata/workloads/functional-query/queries/QueryTest/exprs.test.
Change-Id: I0bd06c3d6dba354d71f63c649eaa8f9f74d266ee
Reviewed-on: http://gerrit.cloudera.org:8080/18210
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
be/src/exprs/expr-test.cc | 8 ++++++++
be/src/exprs/string-functions-ir.cc | 4 ++++
be/src/exprs/string-functions.h | 1 +
common/function-registry/impala_functions.py | 1 +
.../functional-query/queries/QueryTest/exprs.test | 15 +++++++++++++++
5 files changed, 29 insertions(+)
diff --git a/be/src/exprs/expr-test.cc b/be/src/exprs/expr-test.cc
index a7c0aa7..62b59ac 100644
--- a/be/src/exprs/expr-test.cc
+++ b/be/src/exprs/expr-test.cc
@@ -10705,6 +10705,14 @@ TEST_P(ExprTest, Utf8MaskTest) {
executor_->PopExecOption();
}
+TEST_P(ExprTest, BytesTest) {
+ // Verifies Bytes(exp) counts number of bytes.
+ TestIsNull("Bytes(NULL)", TYPE_INT);
+ TestValue("Bytes('你好')", TYPE_INT, 6);
+ TestValue("Bytes('你好hello')", TYPE_INT, 11);
+ TestValue("Bytes('你好 hello 你好')", TYPE_INT, 19);
+ TestValue("Bytes('hello')", TYPE_INT, 5);
+}
TEST_P(ExprTest, Utf8Test) {
// Verifies utf8_length() counts length by UTF-8 characters instead of bytes.
// '你' and '好' are both encoded into 3 bytes.
diff --git a/be/src/exprs/string-functions-ir.cc b/be/src/exprs/string-functions-ir.cc
index 9e4dc7d..f8a1a61 100644
--- a/be/src/exprs/string-functions-ir.cc
+++ b/be/src/exprs/string-functions-ir.cc
@@ -258,6 +258,10 @@ IntVal StringFunctions::Length(FunctionContext* context, const StringVal& str) {
}
return IntVal(str.len);
}
+IntVal StringFunctions::Bytes(FunctionContext* context,const StringVal& str){
+ if(str.is_null) return IntVal::null();
+ return IntVal(str.len);
+}
IntVal StringFunctions::CharLength(FunctionContext* context, const StringVal& str) {
if (str.is_null) return IntVal::null();
diff --git a/be/src/exprs/string-functions.h b/be/src/exprs/string-functions.h
index aa0544a..b9a2248 100644
--- a/be/src/exprs/string-functions.h
+++ b/be/src/exprs/string-functions.h
@@ -72,6 +72,7 @@ class StringFunctions {
const StringVal& pad);
static StringVal Rpad(FunctionContext*, const StringVal& str, const BigIntVal&,
const StringVal& pad);
+ static IntVal Bytes(FunctionContext*, const StringVal& str);
static IntVal Length(FunctionContext*, const StringVal& str);
static IntVal CharLength(FunctionContext*, const StringVal& str);
static IntVal Utf8Length(FunctionContext*, const StringVal& str);
diff --git a/common/function-registry/impala_functions.py b/common/function-registry/impala_functions.py
index dc01349..ee2a9a5 100644
--- a/common/function-registry/impala_functions.py
+++ b/common/function-registry/impala_functions.py
@@ -507,6 +507,7 @@ visible_functions = [
[['repeat'], 'STRING', ['STRING', 'BIGINT'], 'impala::StringFunctions::Repeat'],
[['lpad'], 'STRING', ['STRING', 'BIGINT', 'STRING'], 'impala::StringFunctions::Lpad'],
[['rpad'], 'STRING', ['STRING', 'BIGINT', 'STRING'], 'impala::StringFunctions::Rpad'],
+ [['bytes'], 'INT', ['STRING'], 'impala::StringFunctions::Bytes'],
[['length'], 'INT', ['STRING'], 'impala::StringFunctions::Length'],
[['length'], 'INT', ['CHAR'], 'impala::StringFunctions::CharLength'],
[['char_length'], 'INT', ['STRING'], 'impala::StringFunctions::Length'],
diff --git a/testdata/workloads/functional-query/queries/QueryTest/exprs.test b/testdata/workloads/functional-query/queries/QueryTest/exprs.test
index 0bb674a..cff4f15 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/exprs.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/exprs.test
@@ -3200,4 +3200,19 @@ select 'escape' like 'escap_'
true
---- TYPES
BOOLEAN
+====
+---- QUERY: IMPALA-955
+# Returns number of bytes in a byte string.
+select bytes(string_col), bytes(date_string_col) from functional.alltypestiny;
+---- RESULTS
+1,8
+1,8
+1,8
+1,8
+1,8
+1,8
+1,8
+1,8
+---- TYPES
+INT, INT
====
\ No newline at end of file