You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by yi...@apache.org on 2022/10/31 00:46:52 UTC

[doris] branch branch-1.1-lts updated: [Bug](Vectorized)fix json_object and json_array function return wrong result on vectorized engine (#13729)

This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-1.1-lts
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-1.1-lts by this push:
     new afebb82066 [Bug](Vectorized)fix json_object and json_array function return wrong result on vectorized engine (#13729)
afebb82066 is described below

commit afebb82066eac5439ac08afd24d8d57c1b285f8e
Author: ChPi <ch...@gmail.com>
AuthorDate: Mon Oct 31 08:46:46 2022 +0800

    [Bug](Vectorized)fix json_object and json_array function return wrong result on vectorized engine (#13729)
    
    * [Bug][Vectorized] fix json_object and json_array function return wrong result on vectorized engine
    
    Co-authored-by: chenjie <ch...@cecdat.com>
---
 be/src/vec/functions/function_json.cpp             | 92 ++++++++++++++++++----
 gensrc/script/doris_builtins_functions.py          |  4 +-
 .../json_function/test_query_json_array.out        | 15 ++++
 .../json_function/test_query_json_object.out       | 20 +++--
 ..._object.groovy => test_query_json_array.groovy} | 31 +++++---
 .../json_function/test_query_json_object.groovy    | 27 ++++---
 6 files changed, 141 insertions(+), 48 deletions(-)

diff --git a/be/src/vec/functions/function_json.cpp b/be/src/vec/functions/function_json.cpp
index 2019ac5910..1c88ecc902 100644
--- a/be/src/vec/functions/function_json.cpp
+++ b/be/src/vec/functions/function_json.cpp
@@ -415,22 +415,28 @@ struct FunctionJsonArrayImpl {
     static void execute_parse(const std::string& type_flags,
                               const std::vector<const ColumnString*>& data_columns,
                               std::vector<rapidjson::Value>& objects,
-                              rapidjson::Document::AllocatorType& allocator) {
+                              rapidjson::Document::AllocatorType& allocator,
+                              const std::vector<const ColumnUInt8*>& nullmaps) {
         for (int i = 0; i < data_columns.size() - 1; i++) {
             constexpr_loop_match<'0', '6', JsonParser>(type_flags[i], objects, allocator,
-                                                       data_columns[i]);
+                                                       data_columns[i], nullmaps[i]);
         }
     }
 
     template <typename TypeImpl>
     static void execute_type(std::vector<rapidjson::Value>& objects,
                              rapidjson::Document::AllocatorType& allocator,
-                             const ColumnString* data_column) {
+                             const ColumnString* data_column, const ColumnUInt8* nullmap) {
         StringParser::ParseResult result;
         rapidjson::Value value;
 
         for (int i = 0; i < objects.size(); i++) {
-            TypeImpl::update_value(result, value, data_column->get_data_at(i), allocator);
+            if (nullmap != nullptr && nullmap->get_data()[i]) {
+                JsonParser<'0'>::update_value(result, value, data_column->get_data_at(i),
+                                              allocator);
+            } else {
+                TypeImpl::update_value(result, value, data_column->get_data_at(i), allocator);
+            }
             objects[i].PushBack(value, allocator);
         }
     }
@@ -444,40 +450,96 @@ struct FunctionJsonObjectImpl {
     static void execute_parse(std::string type_flags,
                               const std::vector<const ColumnString*>& data_columns,
                               std::vector<rapidjson::Value>& objects,
-                              rapidjson::Document::AllocatorType& allocator) {
+                              rapidjson::Document::AllocatorType& allocator,
+                              const std::vector<const ColumnUInt8*>& nullmaps) {
         for (auto& array_object : objects) {
             array_object.SetObject();
         }
-
         for (int i = 0; i + 1 < data_columns.size() - 1; i += 2) {
             constexpr_loop_match<'0', '6', JsonParser>(type_flags[i + 1], objects, allocator,
-                                                       data_columns[i], data_columns[i + 1]);
+                                                       data_columns[i], data_columns[i + 1],
+                                                       nullmaps[i + 1]);
         }
     }
 
     template <typename TypeImpl>
     static void execute_type(std::vector<rapidjson::Value>& objects,
                              rapidjson::Document::AllocatorType& allocator,
-                             const ColumnString* key_column, const ColumnString* value_column) {
+                             const ColumnString* key_column, const ColumnString* value_column,
+                             const ColumnUInt8* nullmap) {
         StringParser::ParseResult result;
         rapidjson::Value key;
         rapidjson::Value value;
-
         for (int i = 0; i < objects.size(); i++) {
             JsonParser<'4'>::update_value(result, key, key_column->get_data_at(i),
                                           allocator); // key always is string
-            TypeImpl::update_value(result, value, value_column->get_data_at(i), allocator);
+            if (nullmap != nullptr && nullmap->get_data()[i]) {
+                JsonParser<'0'>::update_value(result, value, value_column->get_data_at(i),
+                                              allocator);
+            } else {
+                TypeImpl::update_value(result, value, value_column->get_data_at(i), allocator);
+            }
             objects[i].AddMember(key, value, allocator);
         }
     }
 };
 
 template <typename SpecificImpl>
-struct FunctionJsonImpl {
+class FunctionJsonAlwaysNotNullable : public IFunction {
+public:
     static constexpr auto name = SpecificImpl::name;
 
+    static FunctionPtr create() {
+        return std::make_shared<FunctionJsonAlwaysNotNullable<SpecificImpl>>();
+    }
+
+    bool use_default_implementation_for_nulls() const override { return false; }
+
+    String get_name() const override { return name; }
+
+    size_t get_number_of_arguments() const override { return 0; }
+
+    bool is_variadic() const override { return true; }
+
+    bool use_default_implementation_for_constants() const override { return true; }
+
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
+        return std::make_shared<DataTypeString>();
+    }
+
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) override {
+        auto result_column = ColumnString::create();
+
+        std::vector<ColumnPtr> column_ptrs; // prevent converted column destruct
+        std::vector<const ColumnString*> data_columns;
+        std::vector<const ColumnUInt8*> nullmaps;
+        for (int i = 0; i < arguments.size(); i++) {
+            auto column = block.get_by_position(arguments[i]).column;
+            column_ptrs.push_back(column->convert_to_full_column_if_const());
+            const ColumnNullable* col_nullable =
+                    check_and_get_column<ColumnNullable>(column_ptrs.back().get());
+            if (col_nullable) {
+                const ColumnUInt8* col_nullmap = check_and_get_column<ColumnUInt8>(
+                        col_nullable->get_null_map_column_ptr().get());
+                nullmaps.push_back(col_nullmap);
+                const ColumnString* col = check_and_get_column<ColumnString>(
+                        col_nullable->get_nested_column_ptr().get());
+                data_columns.push_back(col);
+            } else {
+                nullmaps.push_back(nullptr);
+                data_columns.push_back(assert_cast<const ColumnString*>(column_ptrs.back().get()));
+            }
+        }
+        execute(data_columns, *assert_cast<ColumnString*>(result_column.get()), input_rows_count,
+                nullmaps);
+        block.get_by_position(result).column = std::move(result_column);
+        return Status::OK();
+    }
+
     static void execute(const std::vector<const ColumnString*>& data_columns,
-                        ColumnString& result_column, size_t input_rows_count) {
+                        ColumnString& result_column, size_t input_rows_count,
+                        const std::vector<const ColumnUInt8*> nullmaps) {
         std::string type_flags = data_columns.back()->get_data_at(0).to_string();
 
         rapidjson::Document document;
@@ -488,7 +550,7 @@ struct FunctionJsonImpl {
             objects.emplace_back(rapidjson::kArrayType);
         }
 
-        SpecificImpl::execute_parse(type_flags, data_columns, objects, allocator);
+        SpecificImpl::execute_parse(type_flags, data_columns, objects, allocator, nullmaps);
 
         rapidjson::StringBuffer buf;
         rapidjson::Writer<rapidjson::StringBuffer> writer(buf);
@@ -572,8 +634,8 @@ void register_function_json(SimpleFunctionFactory& factory) {
     factory.register_function<FunctionGetJsonDouble>();
     factory.register_function<FunctionGetJsonString>();
 
-    factory.register_function<FunctionJson<FunctionJsonImpl<FunctionJsonArrayImpl>>>();
-    factory.register_function<FunctionJson<FunctionJsonImpl<FunctionJsonObjectImpl>>>();
+    factory.register_function<FunctionJsonAlwaysNotNullable<FunctionJsonArrayImpl>>();
+    factory.register_function<FunctionJsonAlwaysNotNullable<FunctionJsonObjectImpl>>();
     factory.register_function<FunctionJson<FunctionJsonQuoteImpl>>();
 }
 
diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py
index b758c00ead..ab0353f826 100755
--- a/gensrc/script/doris_builtins_functions.py
+++ b/gensrc/script/doris_builtins_functions.py
@@ -1111,10 +1111,10 @@ visible_functions = [
 
     [['json_array'], 'VARCHAR', ['VARCHAR', '...'],
             '_ZN5doris13JsonFunctions10json_arrayEPN9doris_udf15FunctionContextEiPKNS1_9StringValE',
-            '', '', 'vec', ''],
+            '', '', 'vec', 'ALWAYS_NOT_NULLABLE'],
     [['json_object'], 'VARCHAR', ['VARCHAR', '...'],
             '_ZN5doris13JsonFunctions11json_objectEPN9doris_udf15FunctionContextEiPKNS1_9StringValE',
-            '', '', 'vec', ''],
+            '', '', 'vec', 'ALWAYS_NOT_NULLABLE'],
     [['json_quote'], 'VARCHAR', ['VARCHAR'],
             '_ZN5doris13JsonFunctions10json_quoteEPN9doris_udf15FunctionContextERKNS1_9StringValE',
             '', '', 'vec', ''],
diff --git a/regression-test/data/query/sql_functions/json_function/test_query_json_array.out b/regression-test/data/query/sql_functions/json_function/test_query_json_array.out
new file mode 100644
index 0000000000..bc6c2d0e7f
--- /dev/null
+++ b/regression-test/data/query/sql_functions/json_function/test_query_json_array.out
@@ -0,0 +1,15 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !sql1 --
+["k0",1,"k1",null,"k2",null,"k3",null,"k4",null,"k5",null,"k6","k6"]
+["k0",2,"k1",1,"k2",null,"k3",null,"k4",null,"k5",null,"k6","k6"]
+["k0",3,"k1",null,"k2",true,"k3",null,"k4",null,"k5",null,"k6","k6"]
+["k0",4,"k1",null,"k2",null,"k3","test","k4","2022-01-01 11:11:11","k5",null,"k6","k6"]
+["k0",5,"k1",1,"k2",true,"k3","test","k4","2022-01-01 11:11:11","k5",null,"k6","k6"]
+
+-- !sql2 --
+["k0",1,"k1",null,"k2",null,"k3",null,"k4",null,"k5",null,"k6","k6"]
+["k0",2,"k1",1,"k2",null,"k3",null,"k4",null,"k5",null,"k6","k6"]
+["k0",3,"k1",null,"k2",true,"k3",null,"k4",null,"k5",null,"k6","k6"]
+["k0",4,"k1",null,"k2",null,"k3","test","k4","2022-01-01 11:11:11","k5",null,"k6","k6"]
+["k0",5,"k1",1,"k2",true,"k3","test","k4","2022-01-01 11:11:11","k5",null,"k6","k6"]
+
diff --git a/regression-test/data/query/sql_functions/json_function/test_query_json_object.out b/regression-test/data/query/sql_functions/json_function/test_query_json_object.out
index ee1dd4abd8..f270e8484e 100644
--- a/regression-test/data/query/sql_functions/json_function/test_query_json_object.out
+++ b/regression-test/data/query/sql_functions/json_function/test_query_json_object.out
@@ -1,9 +1,15 @@
 -- This file is automatically generated. You should know what you did if you want to edit this
--- !sql --
-{"k1":null}
-{"k1":null}
-{"k1":null}
-{"k1":null}
-{"k1":null}
-{"k1":1}
+-- !sql1 --
+{"k0":1,"k1":null,"k2":null,"k3":null,"k4":null,"k5":null,"k6":"k6"}
+{"k0":2,"k1":1,"k2":null,"k3":null,"k4":null,"k5":null,"k6":"k6"}
+{"k0":3,"k1":null,"k2":true,"k3":null,"k4":null,"k5":null,"k6":"k6"}
+{"k0":4,"k1":null,"k2":null,"k3":"test","k4":"2022-01-01 11:11:11","k5":null,"k6":"k6"}
+{"k0":5,"k1":1,"k2":true,"k3":"test","k4":"2022-01-01 11:11:11","k5":null,"k6":"k6"}
+
+-- !sql2 --
+{"k0":1,"k1":null,"k2":null,"k3":null,"k4":null,"k5":null,"k6":"k6"}
+{"k0":2,"k1":1,"k2":null,"k3":null,"k4":null,"k5":null,"k6":"k6"}
+{"k0":3,"k1":null,"k2":true,"k3":null,"k4":null,"k5":null,"k6":"k6"}
+{"k0":4,"k1":null,"k2":null,"k3":"test","k4":"2022-01-01 11:11:11","k5":null,"k6":"k6"}
+{"k0":5,"k1":1,"k2":true,"k3":"test","k4":"2022-01-01 11:11:11","k5":null,"k6":"k6"}
 
diff --git a/regression-test/suites/query/sql_functions/json_function/test_query_json_object.groovy b/regression-test/suites/query/sql_functions/json_function/test_query_json_array.groovy
similarity index 53%
copy from regression-test/suites/query/sql_functions/json_function/test_query_json_object.groovy
copy to regression-test/suites/query/sql_functions/json_function/test_query_json_array.groovy
index 924bffcc75..6a4ef939a3 100644
--- a/regression-test/suites/query/sql_functions/json_function/test_query_json_object.groovy
+++ b/regression-test/suites/query/sql_functions/json_function/test_query_json_array.groovy
@@ -15,29 +15,34 @@
 // specific language governing permissions and limitations
 // under the License.
 
-suite("test_query_json_object", "query") {
+suite("test_query_json_array", "query") {
     sql "set enable_vectorized_engine = false;"
-    def tableName = "test_query_json_object"
+    def tableName = "test_query_json_array"
     sql "DROP TABLE IF EXISTS ${tableName}"
     sql """
-            CREATE TABLE `${tableName}` (
-              `k1` int(11) NULL COMMENT "user id"
+            CREATE TABLE ${tableName} (
+              `k0` int(11) not null,
+              `k1` int(11) NULL,
+              `k2` boolean NULL,
+              `k3` varchar(255),
+              `k4` datetime
             ) ENGINE=OLAP
-            DUPLICATE KEY(`k1`)
+            DUPLICATE KEY(`k0`,`k1`,`k2`,`k3`,`k4`)
             COMMENT "OLAP"
-            DISTRIBUTED BY HASH(`k1`) BUCKETS 1
+            DISTRIBUTED BY HASH(`k0`) BUCKETS 1
             PROPERTIES (
             "replication_allocation" = "tag.location.default: 1",
             "in_memory" = "false",
             "storage_format" = "V2"
             );
         """
-    sql "insert into ${tableName} values(null);"
-    sql "insert into ${tableName} values(null);"
-    sql "insert into ${tableName} values(null);"
-    sql "insert into ${tableName} values(null);"
-    sql "insert into ${tableName} values(null);"
-    sql "insert into ${tableName} values(1);"
-    qt_sql "select json_object(\"k1\",k1) from ${tableName};"
+    sql "insert into ${tableName} values(1,null,null,null,null);"
+    sql "insert into ${tableName} values(2,1,null,null,null);"
+    sql "insert into ${tableName} values(3,null,true,null,null);"
+    sql "insert into ${tableName} values(4,null,null,'test','2022-01-01 11:11:11');"
+    sql "insert into ${tableName} values(5,1,true,'test','2022-01-01 11:11:11');"
+    qt_sql1 "select json_array('k0',k0,'k1',k1,'k2',k2,'k3',k3,'k4',k4,'k5', null,'k6','k6') from ${tableName};"
+    sql "set enable_vectorized_engine = true;"
+    qt_sql2 "select json_array('k0',k0,'k1',k1,'k2',k2,'k3',k3,'k4',k4,'k5', null,'k6','k6') from ${tableName};"
     sql "DROP TABLE ${tableName};"
 }
diff --git a/regression-test/suites/query/sql_functions/json_function/test_query_json_object.groovy b/regression-test/suites/query/sql_functions/json_function/test_query_json_object.groovy
index 924bffcc75..1f35bdf151 100644
--- a/regression-test/suites/query/sql_functions/json_function/test_query_json_object.groovy
+++ b/regression-test/suites/query/sql_functions/json_function/test_query_json_object.groovy
@@ -20,24 +20,29 @@ suite("test_query_json_object", "query") {
     def tableName = "test_query_json_object"
     sql "DROP TABLE IF EXISTS ${tableName}"
     sql """
-            CREATE TABLE `${tableName}` (
-              `k1` int(11) NULL COMMENT "user id"
+            CREATE TABLE ${tableName} (
+              `k0` int(11) not null,
+              `k1` int(11) NULL,
+              `k2` boolean NULL,
+              `k3` varchar(255),
+              `k4` datetime
             ) ENGINE=OLAP
-            DUPLICATE KEY(`k1`)
+            DUPLICATE KEY(`k0`,`k1`,`k2`,`k3`,`k4`)
             COMMENT "OLAP"
-            DISTRIBUTED BY HASH(`k1`) BUCKETS 1
+            DISTRIBUTED BY HASH(`k0`) BUCKETS 1
             PROPERTIES (
             "replication_allocation" = "tag.location.default: 1",
             "in_memory" = "false",
             "storage_format" = "V2"
             );
         """
-    sql "insert into ${tableName} values(null);"
-    sql "insert into ${tableName} values(null);"
-    sql "insert into ${tableName} values(null);"
-    sql "insert into ${tableName} values(null);"
-    sql "insert into ${tableName} values(null);"
-    sql "insert into ${tableName} values(1);"
-    qt_sql "select json_object(\"k1\",k1) from ${tableName};"
+    sql "insert into ${tableName} values(1,null,null,null,null);"
+    sql "insert into ${tableName} values(2,1,null,null,null);"
+    sql "insert into ${tableName} values(3,null,true,null,null);"
+    sql "insert into ${tableName} values(4,null,null,'test','2022-01-01 11:11:11');"
+    sql "insert into ${tableName} values(5,1,true,'test','2022-01-01 11:11:11');"
+    qt_sql1 "select json_object('k0',k0,'k1',k1,'k2',k2,'k3',k3,'k4',k4,'k5', null,'k6','k6') from ${tableName};"
+    sql "set enable_vectorized_engine = true;"
+    qt_sql2 "select json_object('k0',k0,'k1',k1,'k2',k2,'k3',k3,'k4',k4,'k5', null,'k6','k6') from ${tableName};"
     sql "DROP TABLE ${tableName};"
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org