You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by li...@apache.org on 2023/06/16 07:10:19 UTC
[doris] branch master updated: [Feature] (json)add json_contains function (#20824)
This is an automated email from the ASF dual-hosted git repository.
lide pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 97135a1cbb [Feature] (json)add json_contains function (#20824)
97135a1cbb is described below
commit 97135a1cbb2d3483479964515e10a0fde8fc551c
Author: yuxuan-luo <11...@users.noreply.github.com>
AuthorDate: Fri Jun 16 15:10:12 2023 +0800
[Feature] (json)add json_contains function (#20824)
---
be/src/vec/functions/function_json.cpp | 127 +++++++++++++++++++++
.../sql-functions/json-functions/json_contains.md | 69 +++++++++++
.../sql-functions/json-functions/json_contains.md | 69 +++++++++++
gensrc/script/doris_builtins_functions.py | 1 +
.../json_functions/test_json_function.out | 21 ++++
.../json_functions/test_json_function.groovy | 9 ++
6 files changed, 296 insertions(+)
diff --git a/be/src/vec/functions/function_json.cpp b/be/src/vec/functions/function_json.cpp
index e4f955ae0a..4d2f0f3614 100644
--- a/be/src/vec/functions/function_json.cpp
+++ b/be/src/vec/functions/function_json.cpp
@@ -18,6 +18,7 @@
#include <rapidjson/allocators.h>
#include <rapidjson/document.h>
#include <rapidjson/encodings.h>
+#include <rapidjson/pointer.h>
#include <rapidjson/rapidjson.h>
#include <rapidjson/stringbuffer.h>
#include <rapidjson/writer.h>
@@ -935,6 +936,131 @@ public:
}
};
+class FunctionJsonContains : public IFunction {
+public:
+ static constexpr auto name = "json_contains";
+ static FunctionPtr create() { return std::make_shared<FunctionJsonContains>(); }
+
+ String get_name() const override { return name; }
+
+ size_t get_number_of_arguments() const override { return 3; }
+
+ DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
+ return make_nullable(std::make_shared<DataTypeInt32>());
+ }
+
+ bool use_default_implementation_for_nulls() const override { return false; }
+
+ bool json_contains_object(const rapidjson::Value& target,
+ const rapidjson::Value& search_value) {
+ if (!target.IsObject() || !search_value.IsObject()) {
+ return false;
+ }
+
+ for (auto itr = search_value.MemberBegin(); itr != search_value.MemberEnd(); ++itr) {
+ if (!target.HasMember(itr->name) || !json_contains(target[itr->name], itr->value)) {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ bool json_contains_array(const rapidjson::Value& target, const rapidjson::Value& search_value) {
+ if (!target.IsArray() || !search_value.IsArray()) {
+ return false;
+ }
+
+ for (auto itr = search_value.Begin(); itr != search_value.End(); ++itr) {
+ bool found = false;
+ for (auto target_itr = target.Begin(); target_itr != target.End(); ++target_itr) {
+ if (json_contains(*target_itr, *itr)) {
+ found = true;
+ break;
+ }
+ }
+ if (!found) {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ bool json_contains(const rapidjson::Value& target, const rapidjson::Value& search_value) {
+ if (target == search_value) {
+ return true;
+ }
+
+ if (target.IsObject() && search_value.IsObject()) {
+ return json_contains_object(target, search_value);
+ }
+
+ if (target.IsArray() && search_value.IsArray()) {
+ return json_contains_array(target, search_value);
+ }
+
+ return false;
+ }
+
+ Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+ size_t result, size_t input_rows_count) override {
+ const IColumn& col_json = *(block.get_by_position(arguments[0]).column);
+ const IColumn& col_search = *(block.get_by_position(arguments[1]).column);
+ const IColumn& col_path = *(block.get_by_position(arguments[2]).column);
+
+ auto null_map = ColumnUInt8::create(input_rows_count, 0);
+
+ const ColumnString* col_json_string = check_and_get_column<ColumnString>(col_json);
+ const ColumnString* col_search_string = check_and_get_column<ColumnString>(col_search);
+ const ColumnString* col_path_string = check_and_get_column<ColumnString>(col_path);
+
+ if (!col_json_string || !col_search_string || !col_path_string) {
+ return Status::RuntimeError("Illegal column should be ColumnString");
+ }
+
+ auto col_to = ColumnVector<vectorized::Int32>::create();
+ auto& vec_to = col_to->get_data();
+ size_t size = col_json.size();
+ vec_to.resize(size);
+
+ for (size_t i = 0; i < input_rows_count; ++i) {
+ if (col_json.is_null_at(i) || col_search.is_null_at(i) || col_path.is_null_at(i)) {
+ null_map->get_data()[i] = 1;
+ vec_to[i] = 0;
+ continue;
+ }
+
+ const auto& json_val = col_json_string->get_data_at(i);
+ const auto& search_val = col_search_string->get_data_at(i);
+ const auto& path_val = col_path_string->get_data_at(i);
+
+ std::string_view json_string(json_val.data, json_val.size);
+ std::string_view search_string(search_val.data, search_val.size);
+ std::string_view path_string(path_val.data, path_val.size);
+
+ rapidjson::Document document;
+ auto target_val = get_json_object<JSON_FUN_STRING>(json_string, path_string, &document);
+ if (target_val == nullptr || target_val->IsNull()) {
+ vec_to[i] = 0;
+ } else {
+ rapidjson::Document search_doc;
+ search_doc.Parse(search_string.data(), search_string.size());
+ if (json_contains(*target_val, search_doc)) {
+ vec_to[i] = 1;
+ } else {
+ vec_to[i] = 0;
+ }
+ }
+ }
+
+ block.replace_by_position(result,
+ ColumnNullable::create(std::move(col_to), std::move(null_map)));
+
+ return Status::OK();
+ }
+};
+
class FunctionJsonUnquote : public IFunction {
public:
static constexpr auto name = "json_unquote";
@@ -1015,6 +1141,7 @@ void register_function_json(SimpleFunctionFactory& factory) {
factory.register_function<FunctionJson<FunctionJsonExtractImpl>>();
factory.register_function<FunctionJsonValid>();
+ factory.register_function<FunctionJsonContains>();
}
} // namespace doris::vectorized
diff --git a/docs/en/docs/sql-manual/sql-functions/json-functions/json_contains.md b/docs/en/docs/sql-manual/sql-functions/json-functions/json_contains.md
new file mode 100644
index 0000000000..6f8a8f3da5
--- /dev/null
+++ b/docs/en/docs/sql-manual/sql-functions/json-functions/json_contains.md
@@ -0,0 +1,69 @@
+---
+{
+"title": "json_contains",
+"language": "en"
+}
+---
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+## json_contains
+### description
+#### Syntax
+
+`INT json_contains(VARCHAR json_str, VARCHAR candidate, VARCHAR json_path)`
+
+
+Indicates by returning 1 or 0 whether a given candidate JSON document is contained at a specific path within the json_str JSON document
+
+### example
+
+```
+mysql> SET @j = '{"a": 1, "b": 2, "c": {"d": 4}}';
+mysql> SET @j2 = '1';
+mysql> SELECT JSON_CONTAINS(@j, @j2, '$.a');
++-------------------------------+
+| JSON_CONTAINS(@j, @j2, '$.a') |
++-------------------------------+
+| 1 |
++-------------------------------+
+mysql> SELECT JSON_CONTAINS(@j, @j2, '$.b');
++-------------------------------+
+| JSON_CONTAINS(@j, @j2, '$.b') |
++-------------------------------+
+| 0 |
++-------------------------------+
+
+mysql> SET @j2 = '{"d": 4}';
+mysql> SELECT JSON_CONTAINS(@j, @j2, '$.a');
++-------------------------------+
+| JSON_CONTAINS(@j, @j2, '$.a') |
++-------------------------------+
+| 0 |
++-------------------------------+
+mysql> SELECT JSON_CONTAINS(@j, @j2, '$.c');
++-------------------------------+
+| JSON_CONTAINS(@j, @j2, '$.c') |
++-------------------------------+
+| 1 |
++-------------------------------+
+```
+### keywords
+json,json_contains
diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/json-functions/json_contains.md b/docs/zh-CN/docs/sql-manual/sql-functions/json-functions/json_contains.md
new file mode 100644
index 0000000000..fa29486648
--- /dev/null
+++ b/docs/zh-CN/docs/sql-manual/sql-functions/json-functions/json_contains.md
@@ -0,0 +1,69 @@
+---
+{
+"title": "json_contains",
+"language": "zh-CN"
+}
+---
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+## json_contains
+### description
+#### Syntax
+
+`INT json_contains(VARCHAR json_str, VARCHAR candidate, VARCHAR json_path)`
+
+
+通过返回 1 或 0 来指示给定的 candidate JSON 文档是否包含在 json_str JSON json_path 路径下的文档中
+
+### example
+
+```
+mysql> SET @j = '{"a": 1, "b": 2, "c": {"d": 4}}';
+mysql> SET @j2 = '1';
+mysql> SELECT JSON_CONTAINS(@j, @j2, '$.a');
++-------------------------------+
+| JSON_CONTAINS(@j, @j2, '$.a') |
++-------------------------------+
+| 1 |
++-------------------------------+
+mysql> SELECT JSON_CONTAINS(@j, @j2, '$.b');
++-------------------------------+
+| JSON_CONTAINS(@j, @j2, '$.b') |
++-------------------------------+
+| 0 |
++-------------------------------+
+
+mysql> SET @j2 = '{"d": 4}';
+mysql> SELECT JSON_CONTAINS(@j, @j2, '$.a');
++-------------------------------+
+| JSON_CONTAINS(@j, @j2, '$.a') |
++-------------------------------+
+| 0 |
++-------------------------------+
+mysql> SELECT JSON_CONTAINS(@j, @j2, '$.c');
++-------------------------------+
+| JSON_CONTAINS(@j, @j2, '$.c') |
++-------------------------------+
+| 1 |
++-------------------------------+
+```
+### keywords
+json,json_contains
diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py
index 6b9b9458ae..c7e36eb76e 100644
--- a/gensrc/script/doris_builtins_functions.py
+++ b/gensrc/script/doris_builtins_functions.py
@@ -1753,6 +1753,7 @@ visible_functions = {
[['json_object'], 'VARCHAR', ['VARCHAR', '...'], 'ALWAYS_NOT_NULLABLE'],
[['json_quote'], 'VARCHAR', ['VARCHAR'], ''],
[['json_valid'], 'INT', ['VARCHAR'], 'ALWAYS_NULLABLE'],
+ [['json_contains'], 'INT', ['VARCHAR', 'VARCHAR', 'VARCHAR'], 'ALWAYS_NULLABLE'],
[['json_unquote'], 'VARCHAR', ['VARCHAR'], 'ALWAYS_NULLABLE'],
[['json_extract'], 'VARCHAR', ['VARCHAR', 'VARCHAR', '...'], '']
],
diff --git a/regression-test/data/query_p0/sql_functions/json_functions/test_json_function.out b/regression-test/data/query_p0/sql_functions/json_functions/test_json_function.out
index 3fae061dcc..e776281b5d 100644
--- a/regression-test/data/query_p0/sql_functions/json_functions/test_json_function.out
+++ b/regression-test/data/query_p0/sql_functions/json_functions/test_json_function.out
@@ -146,3 +146,24 @@ doris
-- !sql --
123
+-- !sql --
+1
+
+-- !sql --
+0
+
+-- !sql --
+0
+
+-- !sql --
+1
+
+-- !sql --
+0
+
+-- !sql --
+1
+
+-- !sql --
+1
+
diff --git a/regression-test/suites/query_p0/sql_functions/json_functions/test_json_function.groovy b/regression-test/suites/query_p0/sql_functions/json_functions/test_json_function.groovy
index c0a9fe6b8b..2aa4056198 100644
--- a/regression-test/suites/query_p0/sql_functions/json_functions/test_json_function.groovy
+++ b/regression-test/suites/query_p0/sql_functions/json_functions/test_json_function.groovy
@@ -74,4 +74,13 @@ suite("test_json_function") {
qt_sql "SELECT '{\"k1\": \"v1\", \"k2\": { \"k21\": 6.6, \"k22\": [1, 2, 3] } }'->'\$.k2'->'\$.k22'"
qt_sql "SELECT json_unquote('{\"id\": 123, \"name\": \"doris\"}'->'\$.name');"
qt_sql "SELECT json_extract('{\"id\": 123, \"name\": \"doris\"}', '\$.id', '\$.name')->'\$.[0]';"
+
+ qt_sql "SELECT JSON_CONTAINS('{\"a\": 1, \"b\": 2, \"c\": {\"d\": 4}}','1','\$.a');"
+ qt_sql "SELECT JSON_CONTAINS('{\"a\": 1, \"b\": 2, \"c\": {\"d\": 4}}','1','\$.b');"
+ qt_sql "SELECT JSON_CONTAINS('{\"a\": 1, \"b\": 2, \"c\": {\"d\": 4}}','{\"d\": 4}','\$.a');"
+ qt_sql "SELECT JSON_CONTAINS('{\"a\": 1, \"b\": 2, \"c\": {\"d\": 4}}','{\"d\": 4}','\$.c');"
+ qt_sql "SELECT JSON_CONTAINS('{\"name\": \"John\", \"age\": 30, \"city\": \"New York\", \"hobbies\": [\"reading\", \"travelling\"]}', '{\"age\": 31, \"hobbies\": [\"reading\"]}', '\$.');"
+ qt_sql "SELECT JSON_CONTAINS('{\"name\": \"John\", \"age\": 30, \"projects\": [{\"name\": \"Project A\", \"year\": 2020}, {\"name\": \"Project B\", \"year\": 2021}]}', '{\"projects\": [{\"name\": \"Project A\"}]}', '\$.');"
+ qt_sql "SELECT JSON_CONTAINS('{\"name\": \"John\", \"age\": 30, \"address\": {\"city\": \"New York\", \"country\": \"USA\"}}', '{\"address\": {\"city\": \"New York\"}}', '\$.');"
+
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org