You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by li...@apache.org on 2023/06/16 07:10:19 UTC

[doris] branch master updated: [Feature] (json)add json_contains function (#20824)

This is an automated email from the ASF dual-hosted git repository.

lide pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 97135a1cbb [Feature] (json)add json_contains function (#20824)
97135a1cbb is described below

commit 97135a1cbb2d3483479964515e10a0fde8fc551c
Author: yuxuan-luo <11...@users.noreply.github.com>
AuthorDate: Fri Jun 16 15:10:12 2023 +0800

    [Feature] (json)add json_contains function (#20824)
---
 be/src/vec/functions/function_json.cpp             | 127 +++++++++++++++++++++
 .../sql-functions/json-functions/json_contains.md  |  69 +++++++++++
 .../sql-functions/json-functions/json_contains.md  |  69 +++++++++++
 gensrc/script/doris_builtins_functions.py          |   1 +
 .../json_functions/test_json_function.out          |  21 ++++
 .../json_functions/test_json_function.groovy       |   9 ++
 6 files changed, 296 insertions(+)

diff --git a/be/src/vec/functions/function_json.cpp b/be/src/vec/functions/function_json.cpp
index e4f955ae0a..4d2f0f3614 100644
--- a/be/src/vec/functions/function_json.cpp
+++ b/be/src/vec/functions/function_json.cpp
@@ -18,6 +18,7 @@
 #include <rapidjson/allocators.h>
 #include <rapidjson/document.h>
 #include <rapidjson/encodings.h>
+#include <rapidjson/pointer.h>
 #include <rapidjson/rapidjson.h>
 #include <rapidjson/stringbuffer.h>
 #include <rapidjson/writer.h>
@@ -935,6 +936,131 @@ public:
     }
 };
 
+class FunctionJsonContains : public IFunction {
+public:
+    static constexpr auto name = "json_contains";
+    static FunctionPtr create() { return std::make_shared<FunctionJsonContains>(); }
+
+    String get_name() const override { return name; }
+
+    size_t get_number_of_arguments() const override { return 3; }
+
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
+        return make_nullable(std::make_shared<DataTypeInt32>());
+    }
+
+    bool use_default_implementation_for_nulls() const override { return false; }
+
+    bool json_contains_object(const rapidjson::Value& target,
+                              const rapidjson::Value& search_value) {
+        if (!target.IsObject() || !search_value.IsObject()) {
+            return false;
+        }
+
+        for (auto itr = search_value.MemberBegin(); itr != search_value.MemberEnd(); ++itr) {
+            if (!target.HasMember(itr->name) || !json_contains(target[itr->name], itr->value)) {
+                return false;
+            }
+        }
+
+        return true;
+    }
+
+    bool json_contains_array(const rapidjson::Value& target, const rapidjson::Value& search_value) {
+        if (!target.IsArray() || !search_value.IsArray()) {
+            return false;
+        }
+
+        for (auto itr = search_value.Begin(); itr != search_value.End(); ++itr) {
+            bool found = false;
+            for (auto target_itr = target.Begin(); target_itr != target.End(); ++target_itr) {
+                if (json_contains(*target_itr, *itr)) {
+                    found = true;
+                    break;
+                }
+            }
+            if (!found) {
+                return false;
+            }
+        }
+
+        return true;
+    }
+
+    bool json_contains(const rapidjson::Value& target, const rapidjson::Value& search_value) {
+        if (target == search_value) {
+            return true;
+        }
+
+        if (target.IsObject() && search_value.IsObject()) {
+            return json_contains_object(target, search_value);
+        }
+
+        if (target.IsArray() && search_value.IsArray()) {
+            return json_contains_array(target, search_value);
+        }
+
+        return false;
+    }
+
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) override {
+        const IColumn& col_json = *(block.get_by_position(arguments[0]).column);
+        const IColumn& col_search = *(block.get_by_position(arguments[1]).column);
+        const IColumn& col_path = *(block.get_by_position(arguments[2]).column);
+
+        auto null_map = ColumnUInt8::create(input_rows_count, 0);
+
+        const ColumnString* col_json_string = check_and_get_column<ColumnString>(col_json);
+        const ColumnString* col_search_string = check_and_get_column<ColumnString>(col_search);
+        const ColumnString* col_path_string = check_and_get_column<ColumnString>(col_path);
+
+        if (!col_json_string || !col_search_string || !col_path_string) {
+            return Status::RuntimeError("Illegal column should be ColumnString");
+        }
+
+        auto col_to = ColumnVector<vectorized::Int32>::create();
+        auto& vec_to = col_to->get_data();
+        size_t size = col_json.size();
+        vec_to.resize(size);
+
+        for (size_t i = 0; i < input_rows_count; ++i) {
+            if (col_json.is_null_at(i) || col_search.is_null_at(i) || col_path.is_null_at(i)) {
+                null_map->get_data()[i] = 1;
+                vec_to[i] = 0;
+                continue;
+            }
+
+            const auto& json_val = col_json_string->get_data_at(i);
+            const auto& search_val = col_search_string->get_data_at(i);
+            const auto& path_val = col_path_string->get_data_at(i);
+
+            std::string_view json_string(json_val.data, json_val.size);
+            std::string_view search_string(search_val.data, search_val.size);
+            std::string_view path_string(path_val.data, path_val.size);
+
+            rapidjson::Document document;
+            auto target_val = get_json_object<JSON_FUN_STRING>(json_string, path_string, &document);
+            if (target_val == nullptr || target_val->IsNull()) {
+                vec_to[i] = 0;
+            } else {
+                rapidjson::Document search_doc;
+                search_doc.Parse(search_string.data(), search_string.size());
+                if (json_contains(*target_val, search_doc)) {
+                    vec_to[i] = 1;
+                } else {
+                    vec_to[i] = 0;
+                }
+            }
+        }
+
+        block.replace_by_position(result,
+                                  ColumnNullable::create(std::move(col_to), std::move(null_map)));
+
+        return Status::OK();
+    }
+};
+
 class FunctionJsonUnquote : public IFunction {
 public:
     static constexpr auto name = "json_unquote";
@@ -1015,6 +1141,7 @@ void register_function_json(SimpleFunctionFactory& factory) {
     factory.register_function<FunctionJson<FunctionJsonExtractImpl>>();
 
     factory.register_function<FunctionJsonValid>();
+    factory.register_function<FunctionJsonContains>();
 }
 
 } // namespace doris::vectorized
diff --git a/docs/en/docs/sql-manual/sql-functions/json-functions/json_contains.md b/docs/en/docs/sql-manual/sql-functions/json-functions/json_contains.md
new file mode 100644
index 0000000000..6f8a8f3da5
--- /dev/null
+++ b/docs/en/docs/sql-manual/sql-functions/json-functions/json_contains.md
@@ -0,0 +1,69 @@
+---
+{
+"title": "json_contains",
+"language": "en"
+}
+---
+
+<!-- 
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+## json_contains
+### description
+#### Syntax
+
+`INT json_contains(VARCHAR json_str, VARCHAR candidate, VARCHAR json_path)`
+
+
+Indicates by returning 1 or 0 whether a given candidate JSON document is contained at a specific path within the json_str JSON document
+
+### example
+
+```
+mysql> SET @j = '{"a": 1, "b": 2, "c": {"d": 4}}';
+mysql> SET @j2 = '1';
+mysql> SELECT JSON_CONTAINS(@j, @j2, '$.a');
++-------------------------------+
+| JSON_CONTAINS(@j, @j2, '$.a') |
++-------------------------------+
+|                             1 |
++-------------------------------+
+mysql> SELECT JSON_CONTAINS(@j, @j2, '$.b');
++-------------------------------+
+| JSON_CONTAINS(@j, @j2, '$.b') |
++-------------------------------+
+|                             0 |
++-------------------------------+
+
+mysql> SET @j2 = '{"d": 4}';
+mysql> SELECT JSON_CONTAINS(@j, @j2, '$.a');
++-------------------------------+
+| JSON_CONTAINS(@j, @j2, '$.a') |
++-------------------------------+
+|                             0 |
++-------------------------------+
+mysql> SELECT JSON_CONTAINS(@j, @j2, '$.c');
++-------------------------------+
+| JSON_CONTAINS(@j, @j2, '$.c') |
++-------------------------------+
+|                             1 |
++-------------------------------+
+```
+### keywords
+json,json_contains
diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/json-functions/json_contains.md b/docs/zh-CN/docs/sql-manual/sql-functions/json-functions/json_contains.md
new file mode 100644
index 0000000000..fa29486648
--- /dev/null
+++ b/docs/zh-CN/docs/sql-manual/sql-functions/json-functions/json_contains.md
@@ -0,0 +1,69 @@
+---
+{
+"title": "json_contains",
+"language": "zh-CN"
+}
+---
+
+<!-- 
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+## json_contains
+### description
+#### Syntax
+
+`INT json_contains(VARCHAR json_str, VARCHAR candidate, VARCHAR json_path)`
+
+
+通过返回 1 或 0 来指示给定的 candidate JSON 文档是否包含在 json_str JSON json_path 路径下的文档中
+
+### example
+
+```
+mysql> SET @j = '{"a": 1, "b": 2, "c": {"d": 4}}';
+mysql> SET @j2 = '1';
+mysql> SELECT JSON_CONTAINS(@j, @j2, '$.a');
++-------------------------------+
+| JSON_CONTAINS(@j, @j2, '$.a') |
++-------------------------------+
+|                             1 |
++-------------------------------+
+mysql> SELECT JSON_CONTAINS(@j, @j2, '$.b');
++-------------------------------+
+| JSON_CONTAINS(@j, @j2, '$.b') |
++-------------------------------+
+|                             0 |
++-------------------------------+
+
+mysql> SET @j2 = '{"d": 4}';
+mysql> SELECT JSON_CONTAINS(@j, @j2, '$.a');
++-------------------------------+
+| JSON_CONTAINS(@j, @j2, '$.a') |
++-------------------------------+
+|                             0 |
++-------------------------------+
+mysql> SELECT JSON_CONTAINS(@j, @j2, '$.c');
++-------------------------------+
+| JSON_CONTAINS(@j, @j2, '$.c') |
++-------------------------------+
+|                             1 |
++-------------------------------+
+```
+### keywords
+json,json_contains
diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py
index 6b9b9458ae..c7e36eb76e 100644
--- a/gensrc/script/doris_builtins_functions.py
+++ b/gensrc/script/doris_builtins_functions.py
@@ -1753,6 +1753,7 @@ visible_functions = {
         [['json_object'], 'VARCHAR', ['VARCHAR', '...'], 'ALWAYS_NOT_NULLABLE'],
         [['json_quote'], 'VARCHAR', ['VARCHAR'], ''],
         [['json_valid'], 'INT', ['VARCHAR'], 'ALWAYS_NULLABLE'],
+        [['json_contains'], 'INT', ['VARCHAR', 'VARCHAR', 'VARCHAR'], 'ALWAYS_NULLABLE'],
         [['json_unquote'], 'VARCHAR', ['VARCHAR'], 'ALWAYS_NULLABLE'],
         [['json_extract'], 'VARCHAR', ['VARCHAR', 'VARCHAR', '...'], '']
     ],
diff --git a/regression-test/data/query_p0/sql_functions/json_functions/test_json_function.out b/regression-test/data/query_p0/sql_functions/json_functions/test_json_function.out
index 3fae061dcc..e776281b5d 100644
--- a/regression-test/data/query_p0/sql_functions/json_functions/test_json_function.out
+++ b/regression-test/data/query_p0/sql_functions/json_functions/test_json_function.out
@@ -146,3 +146,24 @@ doris
 -- !sql --
 123
 
+-- !sql --
+1
+
+-- !sql --
+0
+
+-- !sql --
+0
+
+-- !sql --
+1
+
+-- !sql --
+0
+
+-- !sql --
+1
+
+-- !sql --
+1
+
diff --git a/regression-test/suites/query_p0/sql_functions/json_functions/test_json_function.groovy b/regression-test/suites/query_p0/sql_functions/json_functions/test_json_function.groovy
index c0a9fe6b8b..2aa4056198 100644
--- a/regression-test/suites/query_p0/sql_functions/json_functions/test_json_function.groovy
+++ b/regression-test/suites/query_p0/sql_functions/json_functions/test_json_function.groovy
@@ -74,4 +74,13 @@ suite("test_json_function") {
     qt_sql "SELECT '{\"k1\": \"v1\", \"k2\": { \"k21\": 6.6, \"k22\": [1, 2, 3] } }'->'\$.k2'->'\$.k22'"
     qt_sql "SELECT json_unquote('{\"id\": 123, \"name\": \"doris\"}'->'\$.name');"
     qt_sql "SELECT json_extract('{\"id\": 123, \"name\": \"doris\"}', '\$.id', '\$.name')->'\$.[0]';"
+
+    qt_sql "SELECT JSON_CONTAINS('{\"a\": 1, \"b\": 2, \"c\": {\"d\": 4}}','1','\$.a');"
+    qt_sql "SELECT JSON_CONTAINS('{\"a\": 1, \"b\": 2, \"c\": {\"d\": 4}}','1','\$.b');"
+    qt_sql "SELECT JSON_CONTAINS('{\"a\": 1, \"b\": 2, \"c\": {\"d\": 4}}','{\"d\": 4}','\$.a');"
+    qt_sql "SELECT JSON_CONTAINS('{\"a\": 1, \"b\": 2, \"c\": {\"d\": 4}}','{\"d\": 4}','\$.c');"
+    qt_sql "SELECT JSON_CONTAINS('{\"name\": \"John\", \"age\": 30, \"city\": \"New York\", \"hobbies\": [\"reading\", \"travelling\"]}', '{\"age\": 31, \"hobbies\": [\"reading\"]}', '\$.');"
+    qt_sql "SELECT JSON_CONTAINS('{\"name\": \"John\", \"age\": 30, \"projects\": [{\"name\": \"Project A\", \"year\": 2020}, {\"name\": \"Project B\", \"year\": 2021}]}', '{\"projects\": [{\"name\": \"Project A\"}]}', '\$.');"
+    qt_sql "SELECT JSON_CONTAINS('{\"name\": \"John\", \"age\": 30, \"address\": {\"city\": \"New York\", \"country\": \"USA\"}}', '{\"address\": {\"city\": \"New York\"}}', '\$.');"
+
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org