You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by yi...@apache.org on 2022/10/31 00:46:52 UTC
[doris] branch branch-1.1-lts updated: [Bug](Vectorized)fix json_object and json_array function return wrong result on vectorized engine (#13729)
This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-1.1-lts
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-1.1-lts by this push:
new afebb82066 [Bug](Vectorized)fix json_object and json_array function return wrong result on vectorized engine (#13729)
afebb82066 is described below
commit afebb82066eac5439ac08afd24d8d57c1b285f8e
Author: ChPi <ch...@gmail.com>
AuthorDate: Mon Oct 31 08:46:46 2022 +0800
[Bug](Vectorized)fix json_object and json_array function return wrong result on vectorized engine (#13729)
* [Bug][Vectorized] fix json_object and json_array function return wrong result on vectorized engine
Co-authored-by: chenjie <ch...@cecdat.com>
---
be/src/vec/functions/function_json.cpp | 92 ++++++++++++++++++----
gensrc/script/doris_builtins_functions.py | 4 +-
.../json_function/test_query_json_array.out | 15 ++++
.../json_function/test_query_json_object.out | 20 +++--
..._object.groovy => test_query_json_array.groovy} | 31 +++++---
.../json_function/test_query_json_object.groovy | 27 ++++---
6 files changed, 141 insertions(+), 48 deletions(-)
diff --git a/be/src/vec/functions/function_json.cpp b/be/src/vec/functions/function_json.cpp
index 2019ac5910..1c88ecc902 100644
--- a/be/src/vec/functions/function_json.cpp
+++ b/be/src/vec/functions/function_json.cpp
@@ -415,22 +415,28 @@ struct FunctionJsonArrayImpl {
static void execute_parse(const std::string& type_flags,
const std::vector<const ColumnString*>& data_columns,
std::vector<rapidjson::Value>& objects,
- rapidjson::Document::AllocatorType& allocator) {
+ rapidjson::Document::AllocatorType& allocator,
+ const std::vector<const ColumnUInt8*>& nullmaps) {
for (int i = 0; i < data_columns.size() - 1; i++) {
constexpr_loop_match<'0', '6', JsonParser>(type_flags[i], objects, allocator,
- data_columns[i]);
+ data_columns[i], nullmaps[i]);
}
}
template <typename TypeImpl>
static void execute_type(std::vector<rapidjson::Value>& objects,
rapidjson::Document::AllocatorType& allocator,
- const ColumnString* data_column) {
+ const ColumnString* data_column, const ColumnUInt8* nullmap) {
StringParser::ParseResult result;
rapidjson::Value value;
for (int i = 0; i < objects.size(); i++) {
- TypeImpl::update_value(result, value, data_column->get_data_at(i), allocator);
+ if (nullmap != nullptr && nullmap->get_data()[i]) {
+ JsonParser<'0'>::update_value(result, value, data_column->get_data_at(i),
+ allocator);
+ } else {
+ TypeImpl::update_value(result, value, data_column->get_data_at(i), allocator);
+ }
objects[i].PushBack(value, allocator);
}
}
@@ -444,40 +450,96 @@ struct FunctionJsonObjectImpl {
static void execute_parse(std::string type_flags,
const std::vector<const ColumnString*>& data_columns,
std::vector<rapidjson::Value>& objects,
- rapidjson::Document::AllocatorType& allocator) {
+ rapidjson::Document::AllocatorType& allocator,
+ const std::vector<const ColumnUInt8*>& nullmaps) {
for (auto& array_object : objects) {
array_object.SetObject();
}
-
for (int i = 0; i + 1 < data_columns.size() - 1; i += 2) {
constexpr_loop_match<'0', '6', JsonParser>(type_flags[i + 1], objects, allocator,
- data_columns[i], data_columns[i + 1]);
+ data_columns[i], data_columns[i + 1],
+ nullmaps[i + 1]);
}
}
template <typename TypeImpl>
static void execute_type(std::vector<rapidjson::Value>& objects,
rapidjson::Document::AllocatorType& allocator,
- const ColumnString* key_column, const ColumnString* value_column) {
+ const ColumnString* key_column, const ColumnString* value_column,
+ const ColumnUInt8* nullmap) {
StringParser::ParseResult result;
rapidjson::Value key;
rapidjson::Value value;
-
for (int i = 0; i < objects.size(); i++) {
JsonParser<'4'>::update_value(result, key, key_column->get_data_at(i),
allocator); // key always is string
- TypeImpl::update_value(result, value, value_column->get_data_at(i), allocator);
+ if (nullmap != nullptr && nullmap->get_data()[i]) {
+ JsonParser<'0'>::update_value(result, value, value_column->get_data_at(i),
+ allocator);
+ } else {
+ TypeImpl::update_value(result, value, value_column->get_data_at(i), allocator);
+ }
objects[i].AddMember(key, value, allocator);
}
}
};
template <typename SpecificImpl>
-struct FunctionJsonImpl {
+class FunctionJsonAlwaysNotNullable : public IFunction {
+public:
static constexpr auto name = SpecificImpl::name;
+ static FunctionPtr create() {
+ return std::make_shared<FunctionJsonAlwaysNotNullable<SpecificImpl>>();
+ }
+
+ bool use_default_implementation_for_nulls() const override { return false; }
+
+ String get_name() const override { return name; }
+
+ size_t get_number_of_arguments() const override { return 0; }
+
+ bool is_variadic() const override { return true; }
+
+ bool use_default_implementation_for_constants() const override { return true; }
+
+ DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
+ return std::make_shared<DataTypeString>();
+ }
+
+ Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+ size_t result, size_t input_rows_count) override {
+ auto result_column = ColumnString::create();
+
+ std::vector<ColumnPtr> column_ptrs; // prevent converted column destruct
+ std::vector<const ColumnString*> data_columns;
+ std::vector<const ColumnUInt8*> nullmaps;
+ for (int i = 0; i < arguments.size(); i++) {
+ auto column = block.get_by_position(arguments[i]).column;
+ column_ptrs.push_back(column->convert_to_full_column_if_const());
+ const ColumnNullable* col_nullable =
+ check_and_get_column<ColumnNullable>(column_ptrs.back().get());
+ if (col_nullable) {
+ const ColumnUInt8* col_nullmap = check_and_get_column<ColumnUInt8>(
+ col_nullable->get_null_map_column_ptr().get());
+ nullmaps.push_back(col_nullmap);
+ const ColumnString* col = check_and_get_column<ColumnString>(
+ col_nullable->get_nested_column_ptr().get());
+ data_columns.push_back(col);
+ } else {
+ nullmaps.push_back(nullptr);
+ data_columns.push_back(assert_cast<const ColumnString*>(column_ptrs.back().get()));
+ }
+ }
+ execute(data_columns, *assert_cast<ColumnString*>(result_column.get()), input_rows_count,
+ nullmaps);
+ block.get_by_position(result).column = std::move(result_column);
+ return Status::OK();
+ }
+
static void execute(const std::vector<const ColumnString*>& data_columns,
- ColumnString& result_column, size_t input_rows_count) {
+ ColumnString& result_column, size_t input_rows_count,
+ const std::vector<const ColumnUInt8*> nullmaps) {
std::string type_flags = data_columns.back()->get_data_at(0).to_string();
rapidjson::Document document;
@@ -488,7 +550,7 @@ struct FunctionJsonImpl {
objects.emplace_back(rapidjson::kArrayType);
}
- SpecificImpl::execute_parse(type_flags, data_columns, objects, allocator);
+ SpecificImpl::execute_parse(type_flags, data_columns, objects, allocator, nullmaps);
rapidjson::StringBuffer buf;
rapidjson::Writer<rapidjson::StringBuffer> writer(buf);
@@ -572,8 +634,8 @@ void register_function_json(SimpleFunctionFactory& factory) {
factory.register_function<FunctionGetJsonDouble>();
factory.register_function<FunctionGetJsonString>();
- factory.register_function<FunctionJson<FunctionJsonImpl<FunctionJsonArrayImpl>>>();
- factory.register_function<FunctionJson<FunctionJsonImpl<FunctionJsonObjectImpl>>>();
+ factory.register_function<FunctionJsonAlwaysNotNullable<FunctionJsonArrayImpl>>();
+ factory.register_function<FunctionJsonAlwaysNotNullable<FunctionJsonObjectImpl>>();
factory.register_function<FunctionJson<FunctionJsonQuoteImpl>>();
}
diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py
index b758c00ead..ab0353f826 100755
--- a/gensrc/script/doris_builtins_functions.py
+++ b/gensrc/script/doris_builtins_functions.py
@@ -1111,10 +1111,10 @@ visible_functions = [
[['json_array'], 'VARCHAR', ['VARCHAR', '...'],
'_ZN5doris13JsonFunctions10json_arrayEPN9doris_udf15FunctionContextEiPKNS1_9StringValE',
- '', '', 'vec', ''],
+ '', '', 'vec', 'ALWAYS_NOT_NULLABLE'],
[['json_object'], 'VARCHAR', ['VARCHAR', '...'],
'_ZN5doris13JsonFunctions11json_objectEPN9doris_udf15FunctionContextEiPKNS1_9StringValE',
- '', '', 'vec', ''],
+ '', '', 'vec', 'ALWAYS_NOT_NULLABLE'],
[['json_quote'], 'VARCHAR', ['VARCHAR'],
'_ZN5doris13JsonFunctions10json_quoteEPN9doris_udf15FunctionContextERKNS1_9StringValE',
'', '', 'vec', ''],
diff --git a/regression-test/data/query/sql_functions/json_function/test_query_json_array.out b/regression-test/data/query/sql_functions/json_function/test_query_json_array.out
new file mode 100644
index 0000000000..bc6c2d0e7f
--- /dev/null
+++ b/regression-test/data/query/sql_functions/json_function/test_query_json_array.out
@@ -0,0 +1,15 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !sql1 --
+["k0",1,"k1",null,"k2",null,"k3",null,"k4",null,"k5",null,"k6","k6"]
+["k0",2,"k1",1,"k2",null,"k3",null,"k4",null,"k5",null,"k6","k6"]
+["k0",3,"k1",null,"k2",true,"k3",null,"k4",null,"k5",null,"k6","k6"]
+["k0",4,"k1",null,"k2",null,"k3","test","k4","2022-01-01 11:11:11","k5",null,"k6","k6"]
+["k0",5,"k1",1,"k2",true,"k3","test","k4","2022-01-01 11:11:11","k5",null,"k6","k6"]
+
+-- !sql2 --
+["k0",1,"k1",null,"k2",null,"k3",null,"k4",null,"k5",null,"k6","k6"]
+["k0",2,"k1",1,"k2",null,"k3",null,"k4",null,"k5",null,"k6","k6"]
+["k0",3,"k1",null,"k2",true,"k3",null,"k4",null,"k5",null,"k6","k6"]
+["k0",4,"k1",null,"k2",null,"k3","test","k4","2022-01-01 11:11:11","k5",null,"k6","k6"]
+["k0",5,"k1",1,"k2",true,"k3","test","k4","2022-01-01 11:11:11","k5",null,"k6","k6"]
+
diff --git a/regression-test/data/query/sql_functions/json_function/test_query_json_object.out b/regression-test/data/query/sql_functions/json_function/test_query_json_object.out
index ee1dd4abd8..f270e8484e 100644
--- a/regression-test/data/query/sql_functions/json_function/test_query_json_object.out
+++ b/regression-test/data/query/sql_functions/json_function/test_query_json_object.out
@@ -1,9 +1,15 @@
-- This file is automatically generated. You should know what you did if you want to edit this
--- !sql --
-{"k1":null}
-{"k1":null}
-{"k1":null}
-{"k1":null}
-{"k1":null}
-{"k1":1}
+-- !sql1 --
+{"k0":1,"k1":null,"k2":null,"k3":null,"k4":null,"k5":null,"k6":"k6"}
+{"k0":2,"k1":1,"k2":null,"k3":null,"k4":null,"k5":null,"k6":"k6"}
+{"k0":3,"k1":null,"k2":true,"k3":null,"k4":null,"k5":null,"k6":"k6"}
+{"k0":4,"k1":null,"k2":null,"k3":"test","k4":"2022-01-01 11:11:11","k5":null,"k6":"k6"}
+{"k0":5,"k1":1,"k2":true,"k3":"test","k4":"2022-01-01 11:11:11","k5":null,"k6":"k6"}
+
+-- !sql2 --
+{"k0":1,"k1":null,"k2":null,"k3":null,"k4":null,"k5":null,"k6":"k6"}
+{"k0":2,"k1":1,"k2":null,"k3":null,"k4":null,"k5":null,"k6":"k6"}
+{"k0":3,"k1":null,"k2":true,"k3":null,"k4":null,"k5":null,"k6":"k6"}
+{"k0":4,"k1":null,"k2":null,"k3":"test","k4":"2022-01-01 11:11:11","k5":null,"k6":"k6"}
+{"k0":5,"k1":1,"k2":true,"k3":"test","k4":"2022-01-01 11:11:11","k5":null,"k6":"k6"}
diff --git a/regression-test/suites/query/sql_functions/json_function/test_query_json_object.groovy b/regression-test/suites/query/sql_functions/json_function/test_query_json_array.groovy
similarity index 53%
copy from regression-test/suites/query/sql_functions/json_function/test_query_json_object.groovy
copy to regression-test/suites/query/sql_functions/json_function/test_query_json_array.groovy
index 924bffcc75..6a4ef939a3 100644
--- a/regression-test/suites/query/sql_functions/json_function/test_query_json_object.groovy
+++ b/regression-test/suites/query/sql_functions/json_function/test_query_json_array.groovy
@@ -15,29 +15,34 @@
// specific language governing permissions and limitations
// under the License.
-suite("test_query_json_object", "query") {
+suite("test_query_json_array", "query") {
sql "set enable_vectorized_engine = false;"
- def tableName = "test_query_json_object"
+ def tableName = "test_query_json_array"
sql "DROP TABLE IF EXISTS ${tableName}"
sql """
- CREATE TABLE `${tableName}` (
- `k1` int(11) NULL COMMENT "user id"
+ CREATE TABLE ${tableName} (
+ `k0` int(11) not null,
+ `k1` int(11) NULL,
+ `k2` boolean NULL,
+ `k3` varchar(255),
+ `k4` datetime
) ENGINE=OLAP
- DUPLICATE KEY(`k1`)
+ DUPLICATE KEY(`k0`,`k1`,`k2`,`k3`,`k4`)
COMMENT "OLAP"
- DISTRIBUTED BY HASH(`k1`) BUCKETS 1
+ DISTRIBUTED BY HASH(`k0`) BUCKETS 1
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"in_memory" = "false",
"storage_format" = "V2"
);
"""
- sql "insert into ${tableName} values(null);"
- sql "insert into ${tableName} values(null);"
- sql "insert into ${tableName} values(null);"
- sql "insert into ${tableName} values(null);"
- sql "insert into ${tableName} values(null);"
- sql "insert into ${tableName} values(1);"
- qt_sql "select json_object(\"k1\",k1) from ${tableName};"
+ sql "insert into ${tableName} values(1,null,null,null,null);"
+ sql "insert into ${tableName} values(2,1,null,null,null);"
+ sql "insert into ${tableName} values(3,null,true,null,null);"
+ sql "insert into ${tableName} values(4,null,null,'test','2022-01-01 11:11:11');"
+ sql "insert into ${tableName} values(5,1,true,'test','2022-01-01 11:11:11');"
+ qt_sql1 "select json_array('k0',k0,'k1',k1,'k2',k2,'k3',k3,'k4',k4,'k5', null,'k6','k6') from ${tableName};"
+ sql "set enable_vectorized_engine = true;"
+ qt_sql2 "select json_array('k0',k0,'k1',k1,'k2',k2,'k3',k3,'k4',k4,'k5', null,'k6','k6') from ${tableName};"
sql "DROP TABLE ${tableName};"
}
diff --git a/regression-test/suites/query/sql_functions/json_function/test_query_json_object.groovy b/regression-test/suites/query/sql_functions/json_function/test_query_json_object.groovy
index 924bffcc75..1f35bdf151 100644
--- a/regression-test/suites/query/sql_functions/json_function/test_query_json_object.groovy
+++ b/regression-test/suites/query/sql_functions/json_function/test_query_json_object.groovy
@@ -20,24 +20,29 @@ suite("test_query_json_object", "query") {
def tableName = "test_query_json_object"
sql "DROP TABLE IF EXISTS ${tableName}"
sql """
- CREATE TABLE `${tableName}` (
- `k1` int(11) NULL COMMENT "user id"
+ CREATE TABLE ${tableName} (
+ `k0` int(11) not null,
+ `k1` int(11) NULL,
+ `k2` boolean NULL,
+ `k3` varchar(255),
+ `k4` datetime
) ENGINE=OLAP
- DUPLICATE KEY(`k1`)
+ DUPLICATE KEY(`k0`,`k1`,`k2`,`k3`,`k4`)
COMMENT "OLAP"
- DISTRIBUTED BY HASH(`k1`) BUCKETS 1
+ DISTRIBUTED BY HASH(`k0`) BUCKETS 1
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"in_memory" = "false",
"storage_format" = "V2"
);
"""
- sql "insert into ${tableName} values(null);"
- sql "insert into ${tableName} values(null);"
- sql "insert into ${tableName} values(null);"
- sql "insert into ${tableName} values(null);"
- sql "insert into ${tableName} values(null);"
- sql "insert into ${tableName} values(1);"
- qt_sql "select json_object(\"k1\",k1) from ${tableName};"
+ sql "insert into ${tableName} values(1,null,null,null,null);"
+ sql "insert into ${tableName} values(2,1,null,null,null);"
+ sql "insert into ${tableName} values(3,null,true,null,null);"
+ sql "insert into ${tableName} values(4,null,null,'test','2022-01-01 11:11:11');"
+ sql "insert into ${tableName} values(5,1,true,'test','2022-01-01 11:11:11');"
+ qt_sql1 "select json_object('k0',k0,'k1',k1,'k2',k2,'k3',k3,'k4',k4,'k5', null,'k6','k6') from ${tableName};"
+ sql "set enable_vectorized_engine = true;"
+ qt_sql2 "select json_object('k0',k0,'k1',k1,'k2',k2,'k3',k3,'k4',k4,'k5', null,'k6','k6') from ${tableName};"
sql "DROP TABLE ${tableName};"
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org