You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by kx...@apache.org on 2023/06/07 16:13:03 UTC

[doris] 09/13: [Fix](inverted index) if range query exceeds CLucene limits, downgrade it from inverted index (#20528)

This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0-beta
in repository https://gitbox.apache.org/repos/asf/doris.git

commit c6a2bc900d66350664ca3bf377d65ecfd3e5160e
Author: airborne12 <ai...@gmail.com>
AuthorDate: Wed Jun 7 20:07:48 2023 +0800

    [Fix](inverted index) if range query exceeds CLucene limits, downgrade it from inverted index (#20528)
    
    CLucene may throw CL_ERR_TooManyClauses when a range query hits too many terms.
    In this situation, we have to downgrade from inverted index.
---
 be/src/common/status.h                               |  4 ++--
 .../olap/rowset/segment_v2/inverted_index_reader.cpp | 20 +++++++++++++++++---
 .../olap/rowset/segment_v2/inverted_index_reader.h   |  1 +
 be/src/olap/rowset/segment_v2/segment_iterator.cpp   |  4 ++--
 4 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/be/src/common/status.h b/be/src/common/status.h
index ba634df13c..146b4f34f7 100644
--- a/be/src/common/status.h
+++ b/be/src/common/status.h
@@ -262,7 +262,7 @@ E(INVERTED_INDEX_INVALID_PARAMETERS, -6000);
 E(INVERTED_INDEX_NOT_SUPPORTED, -6001);
 E(INVERTED_INDEX_CLUCENE_ERROR, -6002);
 E(INVERTED_INDEX_FILE_NOT_FOUND, -6003);
-E(INVERTED_INDEX_FILE_HIT_LIMIT, -6004);
+E(INVERTED_INDEX_BYPASS, -6004);
 E(INVERTED_INDEX_NO_TERMS, -6005);
 E(INVERTED_INDEX_RENAME_FILE_FAILED, -6006);
 E(INVERTED_INDEX_EVALUATE_SKIPPED, -6007);
@@ -293,7 +293,7 @@ constexpr bool capture_stacktrace() {
         && code != ErrorCode::INVERTED_INDEX_NOT_SUPPORTED
         && code != ErrorCode::INVERTED_INDEX_CLUCENE_ERROR
         && code != ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND
-        && code != ErrorCode::INVERTED_INDEX_FILE_HIT_LIMIT
+        && code != ErrorCode::INVERTED_INDEX_BYPASS
         && code != ErrorCode::INVERTED_INDEX_NO_TERMS
         && code != ErrorCode::INVERTED_INDEX_EVALUATE_SKIPPED
         && code != ErrorCode::META_KEY_NOT_FOUND
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
index a97eae836f..ad0deeafb2 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
@@ -73,6 +73,13 @@
 namespace doris {
 namespace segment_v2 {
 
+bool InvertedIndexReader::_is_range_query(InvertedIndexQueryType query_type) {
+    return (query_type == InvertedIndexQueryType::GREATER_THAN_QUERY ||
+            query_type == InvertedIndexQueryType::GREATER_EQUAL_QUERY ||
+            query_type == InvertedIndexQueryType::LESS_THAN_QUERY ||
+            query_type == InvertedIndexQueryType::LESS_EQUAL_QUERY);
+}
+
 bool InvertedIndexReader::_is_match_query(InvertedIndexQueryType query_type) {
     return (query_type == InvertedIndexQueryType::MATCH_ANY_QUERY ||
             query_type == InvertedIndexQueryType::MATCH_ALL_QUERY ||
@@ -491,8 +498,15 @@ Status StringTypeInvertedIndexReader::query(OlapReaderStatistics* stats,
                                     result.add(docid);
                                 });
     } catch (const CLuceneError& e) {
-        LOG(WARNING) << "CLuceneError occured, error msg: " << e.what();
-        return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>();
+        if (_is_range_query(query_type) && e.number() == CL_ERR_TooManyClauses) {
+            LOG(WARNING) << "range query term exceeds limits, try to downgrade from inverted index,"
+                         << "column name:" << column_name << " search_str:" << search_str;
+            return Status::Error<ErrorCode::INVERTED_INDEX_BYPASS>();
+        } else {
+            LOG(WARNING) << "CLuceneError occured, error msg: " << e.what()
+                         << "column name:" << column_name << " search_str:" << search_str;
+            return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>();
+        }
     }
 
     // add to cache
@@ -875,7 +889,7 @@ Status InvertedIndexIterator::read_from_inverted_index(const std::string& column
         if (hit_count > segment_num_rows * query_bkd_limit_percent / 100) {
             LOG(INFO) << "hit count: " << hit_count << ", bkd inverted reached limit "
                       << query_bkd_limit_percent << "%, segment num rows: " << segment_num_rows;
-            return Status::Error<ErrorCode::INVERTED_INDEX_FILE_HIT_LIMIT>();
+            return Status::Error<ErrorCode::INVERTED_INDEX_BYPASS>();
         }
     }
 
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.h b/be/src/olap/rowset/segment_v2/inverted_index_reader.h
index d68939fd5c..f9272d16fa 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_reader.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.h
@@ -109,6 +109,7 @@ public:
                                                         InvertedIndexCtx* inverted_index_ctx);
 
 protected:
+    bool _is_range_query(InvertedIndexQueryType query_type);
     bool _is_match_query(InvertedIndexQueryType query_type);
     friend class InvertedIndexIterator;
     io::FileSystemSPtr _fs;
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index 284ae71f1a..92717945b1 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -752,13 +752,13 @@ Status SegmentIterator::_apply_index_except_leafnode_of_andnode() {
 
 bool SegmentIterator::_downgrade_without_index(Status res, bool need_remaining) {
     if (res.code() == ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND ||
-        res.code() == ErrorCode::INVERTED_INDEX_FILE_HIT_LIMIT ||
+        res.code() == ErrorCode::INVERTED_INDEX_BYPASS ||
         res.code() == ErrorCode::INVERTED_INDEX_EVALUATE_SKIPPED ||
         (res.code() == ErrorCode::INVERTED_INDEX_NO_TERMS && need_remaining)) {
         // 1. INVERTED_INDEX_FILE_NOT_FOUND means index file has not been built,
         //    usually occurs when creating a new index, queries can be downgraded
         //    without index.
-        // 2. INVERTED_INDEX_FILE_HIT_LIMIT means the hit of condition by index
+        // 2. INVERTED_INDEX_BYPASS means the hit of condition by index
         //    has reached the optimal limit, downgrade without index query can
         //    improve query performance.
         // 3. INVERTED_INDEX_EVALUATE_SKIPPED means the inverted index is not


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org