You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by yi...@apache.org on 2023/01/10 00:40:38 UTC

[doris] branch master updated: [refactor](es) remove BE generated dsl for es query #15751

This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 9e3a61989b [refactor](es) remove BE generated dsl for es query #15751
9e3a61989b is described below

commit 9e3a61989b9196b90d1717e9d96e8e4376a26477
Author: Mingyu Chen <mo...@163.com>
AuthorDate: Tue Jan 10 08:40:32 2023 +0800

    [refactor](es) remove BE generated dsl for es query #15751
    
    remove fe config enable_new_es_dsl and all related code.
    Now the DSL for es is always generated on FE side.
---
 be/src/exec/es/es_scroll_query.cpp                 | 17 ++---
 be/src/exec/es/es_scroll_query.h                   |  1 -
 be/src/vec/exec/scan/new_es_scan_node.cpp          | 85 +---------------------
 be/src/vec/exec/scan/new_es_scan_node.h            |  7 --
 docs/en/docs/admin-manual/config/fe-config.md      | 10 ---
 .../docs/ecosystem/external-table/doris-on-es.md   |  2 -
 docs/zh-CN/docs/admin-manual/config/fe-config.md   | 10 ---
 .../docs/ecosystem/external-table/doris-on-es.md   |  2 -
 .../main/java/org/apache/doris/common/Config.java  |  6 --
 .../java/org/apache/doris/planner/EsScanNode.java  |  9 +--
 10 files changed, 9 insertions(+), 140 deletions(-)

diff --git a/be/src/exec/es/es_scroll_query.cpp b/be/src/exec/es/es_scroll_query.cpp
index cefa935a6c..a9e3c5e0e0 100644
--- a/be/src/exec/es/es_scroll_query.cpp
+++ b/be/src/exec/es/es_scroll_query.cpp
@@ -59,7 +59,6 @@ std::string ESScrollQueryBuilder::build_clear_scroll_body(const std::string& scr
 
 std::string ESScrollQueryBuilder::build(const std::map<std::string, std::string>& properties,
                                         const std::vector<std::string>& fields,
-                                        std::vector<EsPredicate*>& predicates,
                                         const std::map<std::string, std::string>& docvalue_context,
                                         bool* doc_value_mode) {
     rapidjson::Document es_query_dsl;
@@ -70,19 +69,13 @@ std::string ESScrollQueryBuilder::build(const std::map<std::string, std::string>
     rapidjson::Value query_node(rapidjson::kObjectType);
     // use fe generate dsl, it must be placed outside the if, otherwise it will cause problems in AddMember
     rapidjson::Document fe_query_dsl;
-    if (properties.find(ESScanReader::KEY_QUERY_DSL) != properties.end()) {
-        auto query_dsl = properties.at(ESScanReader::KEY_QUERY_DSL);
-        es_query_dsl.AddMember("query", fe_query_dsl.Parse(query_dsl.c_str(), query_dsl.length()),
-                               allocator);
-    } else {
-        query_node.SetObject();
-        BooleanQueryBuilder::to_query(predicates, &scratch_document, &query_node);
-        // note: add `query` for this value....
-        es_query_dsl.AddMember("query", query_node, allocator);
-    }
-    bool pure_docvalue = true;
+    DCHECK(properties.find(ESScanReader::KEY_QUERY_DSL) != properties.end());
+    auto query_dsl = properties.at(ESScanReader::KEY_QUERY_DSL);
+    es_query_dsl.AddMember("query", fe_query_dsl.Parse(query_dsl.c_str(), query_dsl.length()),
+                           allocator);
 
     // Doris FE already has checked docvalue-scan optimization
+    bool pure_docvalue = true;
     if (properties.find(ESScanReader::KEY_DOC_VALUES_MODE) != properties.end()) {
         pure_docvalue = atoi(properties.at(ESScanReader::KEY_DOC_VALUES_MODE).c_str());
     } else {
diff --git a/be/src/exec/es/es_scroll_query.h b/be/src/exec/es/es_scroll_query.h
index c9df1d960c..3b98c61c0c 100644
--- a/be/src/exec/es/es_scroll_query.h
+++ b/be/src/exec/es/es_scroll_query.h
@@ -36,7 +36,6 @@ public:
     // tie breaker for predicate whether can push down es can reference the push-down filters
     static std::string build(const std::map<std::string, std::string>& properties,
                              const std::vector<std::string>& fields,
-                             std::vector<EsPredicate*>& predicates,
                              const std::map<std::string, std::string>& docvalue_context,
                              bool* doc_value_mode);
 };
diff --git a/be/src/vec/exec/scan/new_es_scan_node.cpp b/be/src/vec/exec/scan/new_es_scan_node.cpp
index 605cece627..fff859f968 100644
--- a/be/src/vec/exec/scan/new_es_scan_node.cpp
+++ b/be/src/vec/exec/scan/new_es_scan_node.cpp
@@ -118,60 +118,7 @@ Status NewEsScanNode::_process_conjuncts() {
         return Status::OK();
     }
 
-    // fe by enable_new_es_dsl to control whether to generate DSL for easy rollback. After the code is stable, can delete the be generation logic
-    if (_properties.find(ESScanReader::KEY_QUERY_DSL) != _properties.end()) {
-        return Status::OK();
-    }
-
-    // if conjunct is constant, compute direct and set eos = true
-    for (int conj_idx = 0; conj_idx < _conjunct_ctxs.size(); ++conj_idx) {
-        if (_conjunct_ctxs[conj_idx]->root()->is_constant()) {
-            void* value = _conjunct_ctxs[conj_idx]->get_value(nullptr);
-            if (value == nullptr || *reinterpret_cast<bool*>(value) == false) {
-                _eos = true;
-            }
-        }
-    }
-    RETURN_IF_ERROR(build_conjuncts_list());
-    // remove those predicates which ES cannot support
-    std::vector<bool> list;
-    BooleanQueryBuilder::validate(_predicates, &list);
-
-    DCHECK(list.size() == _predicate_to_conjunct.size());
-    for (int i = list.size() - 1; i >= 0; i--) {
-        if (!list[i]) {
-            _predicate_to_conjunct.erase(_predicate_to_conjunct.begin() + i);
-            _predicates.erase(_predicates.begin() + i);
-        }
-    }
-
-    // filter the conjuncts and ES will process them later
-    for (int i = _predicate_to_conjunct.size() - 1; i >= 0; i--) {
-        int conjunct_index = _predicate_to_conjunct[i];
-        _conjunct_ctxs[conjunct_index]->close(_state);
-        _conjunct_ctxs.erase(_conjunct_ctxs.begin() + conjunct_index);
-    }
-
-    auto checker = [&](int index) {
-        return _conjunct_to_predicate[index] != -1 && list[_conjunct_to_predicate[index]];
-    };
-
-    // _peel_pushed_vconjunct
-    if (_vconjunct_ctx_ptr == nullptr) {
-        return Status::OK();
-    }
-    int leaf_index = 0;
-    vectorized::VExpr* conjunct_expr_root = (*_vconjunct_ctx_ptr)->root();
-    if (conjunct_expr_root != nullptr) {
-        vectorized::VExpr* new_conjunct_expr_root = vectorized::VectorizedUtils::dfs_peel_conjunct(
-                _state, *_vconjunct_ctx_ptr, conjunct_expr_root, leaf_index, checker);
-        if (new_conjunct_expr_root == nullptr) {
-            (*_vconjunct_ctx_ptr)->close(_state);
-            _vconjunct_ctx_ptr.reset(nullptr);
-        } else {
-            (*_vconjunct_ctx_ptr)->set_root(new_conjunct_expr_root);
-        }
-    }
+    CHECK(_properties.find(ESScanReader::KEY_QUERY_DSL) != _properties.end());
     return Status::OK();
 }
 
@@ -199,7 +146,7 @@ Status NewEsScanNode::_init_scanners(std::list<VScanner*>* scanners) {
 
         bool doc_value_mode = false;
         properties[ESScanReader::KEY_QUERY] = ESScrollQueryBuilder::build(
-                properties, _column_names, _predicates, _docvalue_context, &doc_value_mode);
+                properties, _column_names, _docvalue_context, &doc_value_mode);
 
         NewEsScanner* scanner = new NewEsScanner(_state, this, _limit_per_scanner, _tuple_id,
                                                  properties, _docvalue_context, doc_value_mode);
@@ -211,32 +158,4 @@ Status NewEsScanNode::_init_scanners(std::list<VScanner*>* scanners) {
     return Status::OK();
 }
 
-// build predicate
-Status NewEsScanNode::build_conjuncts_list() {
-    Status status = Status::OK();
-    _conjunct_to_predicate.resize(_conjunct_ctxs.size());
-
-    for (int i = 0; i < _conjunct_ctxs.size(); ++i) {
-        EsPredicate* predicate = _pool->add(new EsPredicate(_conjunct_ctxs[i], _tuple_desc, _pool));
-        predicate->set_field_context(_fields_context);
-        status = predicate->build_disjuncts_list();
-        if (status.ok()) {
-            _conjunct_to_predicate[i] = _predicate_to_conjunct.size();
-            _predicate_to_conjunct.push_back(i);
-
-            _predicates.push_back(predicate);
-        } else {
-            _conjunct_to_predicate[i] = -1;
-
-            VLOG_CRITICAL << status;
-            status = predicate->get_es_query_status();
-            if (!status.ok()) {
-                LOG(WARNING) << status;
-                return status;
-            }
-        }
-    }
-
-    return Status::OK();
-}
 } // namespace doris::vectorized
diff --git a/be/src/vec/exec/scan/new_es_scan_node.h b/be/src/vec/exec/scan/new_es_scan_node.h
index 55aab31dcc..e57649a91e 100644
--- a/be/src/vec/exec/scan/new_es_scan_node.h
+++ b/be/src/vec/exec/scan/new_es_scan_node.h
@@ -41,9 +41,6 @@ protected:
     Status _process_conjuncts() override;
     Status _init_scanners(std::list<VScanner*>* scanners) override;
 
-private:
-    Status build_conjuncts_list();
-
 private:
     TupleId _tuple_id;
     TupleDescriptor* _tuple_desc;
@@ -55,10 +52,6 @@ private:
     std::vector<std::unique_ptr<TEsScanRange>> _scan_ranges;
     std::vector<std::string> _column_names;
 
-    std::vector<EsPredicate*> _predicates;
-    std::vector<int> _predicate_to_conjunct;
-    std::vector<int> _conjunct_to_predicate;
-
     // Profile
     std::unique_ptr<RuntimeProfile> _es_profile;
     RuntimeProfile::Counter* _rows_read_counter;
diff --git a/docs/en/docs/admin-manual/config/fe-config.md b/docs/en/docs/admin-manual/config/fe-config.md
index 75bea80386..9b9c9794ec 100644
--- a/docs/en/docs/admin-manual/config/fe-config.md
+++ b/docs/en/docs/admin-manual/config/fe-config.md
@@ -2363,16 +2363,6 @@ Default:10
 
 fe will call es api to get es index shard info every es_state_sync_interval_secs
 
-#### `enable_new_es_dsl`
-
-Default:true
-
-Is it possible to dynamically configure: true
-
-Is it a configuration item unique to the Master FE node: false
-
-Use new fe generate es dsl.
-
 ### External Resources
 
 #### `dpp_hadoop_client_path`
diff --git a/docs/en/docs/ecosystem/external-table/doris-on-es.md b/docs/en/docs/ecosystem/external-table/doris-on-es.md
index 77dc7beb92..44cff17f11 100644
--- a/docs/en/docs/ecosystem/external-table/doris-on-es.md
+++ b/docs/en/docs/ecosystem/external-table/doris-on-es.md
@@ -155,8 +155,6 @@ Parameter | Description
 
 An important ability of `Doris On ES` is the push-down of filter conditions: The filtering conditions are pushed to ES, so that only the data that really meets the conditions will be returned, which can significantly improve query performance and reduce CPU, memory, and IO utilization of Doris and ES
 
-`enable_new_es_dsl`Represents whether to use the new dsl generation logic, subsequent bug fixes and iterations are development in the new dsl, default to `true`, can be changed in `fe.conf`
-
 The following operators (Operators) will be optimized to the following ES Query:
 
 | SQL syntax  | ES 5.x+ syntax | 
diff --git a/docs/zh-CN/docs/admin-manual/config/fe-config.md b/docs/zh-CN/docs/admin-manual/config/fe-config.md
index 8dbb37b621..f5ac3ca8e4 100644
--- a/docs/zh-CN/docs/admin-manual/config/fe-config.md
+++ b/docs/zh-CN/docs/admin-manual/config/fe-config.md
@@ -2363,16 +2363,6 @@ hive metastore 的默认超时时间
 
 FE 会在每隔 es_state_sync_interval_secs 调用 es api 获取 es 索引分片信息
 
-#### `enable_new_es_dsl`
-
-默认值:true
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:false
-
-使用新的 fe 生成的 es dsl
-
 ### 外部资源
 
 #### `dpp_hadoop_client_path`
diff --git a/docs/zh-CN/docs/ecosystem/external-table/doris-on-es.md b/docs/zh-CN/docs/ecosystem/external-table/doris-on-es.md
index 99cdffcac4..c170f185b5 100644
--- a/docs/zh-CN/docs/ecosystem/external-table/doris-on-es.md
+++ b/docs/zh-CN/docs/ecosystem/external-table/doris-on-es.md
@@ -152,8 +152,6 @@ PROPERTIES (
 ##### 过滤条件下推
 `Doris On ES`一个重要的功能就是过滤条件的下推: 过滤条件下推给ES,这样只有真正满足条件的数据才会被返回,能够显著的提高查询性能和降低Doris和Elasticsearch的CPU、memory、IO使用量
 
-`enable_new_es_dsl`代表是否使用新版dsl生成逻辑, 后续 bug 修复和迭代都在新版dsl开发, 默认为`true`, 可在`fe.conf`中进行修改
-
 下面的操作符(Operators)会被优化成如下ES Query:
 
 | SQL syntax  | ES 5.x+ syntax | 
diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/Config.java b/fe/fe-core/src/main/java/org/apache/doris/common/Config.java
index 93fbfc6e18..71cb5fd188 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/common/Config.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/common/Config.java
@@ -1732,12 +1732,6 @@ public class Config extends ConfigBase {
     @ConfField(mutable = true, masterOnly = true)
     public static boolean enable_array_type = false;
 
-    /**
-     * Use new fe generate es dsl.
-     */
-    @ConfField(mutable = true)
-    public static boolean enable_new_es_dsl = true;
-
     /**
      * The timeout of executing async remote fragment.
      * In normal case, the async remote fragment will be executed in a short time. If system are under high load
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/EsScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/EsScanNode.java
index a57ee755f3..5a6667eb71 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/EsScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/EsScanNode.java
@@ -29,7 +29,6 @@ import org.apache.doris.catalog.PartitionItem;
 import org.apache.doris.catalog.RangePartitionInfo;
 import org.apache.doris.catalog.external.EsExternalTable;
 import org.apache.doris.common.AnalysisException;
-import org.apache.doris.common.Config;
 import org.apache.doris.common.UserException;
 import org.apache.doris.external.elasticsearch.EsShardPartitions;
 import org.apache.doris.external.elasticsearch.EsShardRouting;
@@ -180,9 +179,7 @@ public class EsScanNode extends ScanNode {
             esScanNode.setDocvalueContext(table.docValueContext());
             properties.put(EsResource.DOC_VALUES_MODE, String.valueOf(useDocValueScan(desc, table.docValueContext())));
         }
-        if (Config.enable_new_es_dsl) {
-            properties.put(EsResource.QUERY_DSL, queryBuilder.toJson());
-        }
+        properties.put(EsResource.QUERY_DSL, queryBuilder.toJson());
         if (table.isEnableKeywordSniff() && table.fieldsContext().size() > 0) {
             esScanNode.setFieldsContext(table.fieldsContext());
         }
@@ -376,9 +373,7 @@ public class EsScanNode extends ScanNode {
             } else {
                 queryBuilder = boolQueryBuilder;
             }
-            if (Config.enable_new_es_dsl) {
-                conjuncts.removeIf(expr -> !notPushDownList.contains(expr));
-            }
+            conjuncts.removeIf(expr -> !notPushDownList.contains(expr));
         }
     }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org