You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by kx...@apache.org on 2023/06/07 16:13:03 UTC
[doris] 09/13: [Fix](inverted index) if range query exceeds CLucene limits, downgrade it from inverted index (#20528)
This is an automated email from the ASF dual-hosted git repository.
kxiao pushed a commit to branch branch-2.0-beta
in repository https://gitbox.apache.org/repos/asf/doris.git
commit c6a2bc900d66350664ca3bf377d65ecfd3e5160e
Author: airborne12 <ai...@gmail.com>
AuthorDate: Wed Jun 7 20:07:48 2023 +0800
[Fix](inverted index) if range query exceeds CLucene limits, downgrade it from inverted index (#20528)
CLucene may throw CL_ERR_TooManyClauses when a range query hits too many terms.
In this situation, we have to downgrade from inverted index.
---
be/src/common/status.h | 4 ++--
.../olap/rowset/segment_v2/inverted_index_reader.cpp | 20 +++++++++++++++++---
.../olap/rowset/segment_v2/inverted_index_reader.h | 1 +
be/src/olap/rowset/segment_v2/segment_iterator.cpp | 4 ++--
4 files changed, 22 insertions(+), 7 deletions(-)
diff --git a/be/src/common/status.h b/be/src/common/status.h
index ba634df13c..146b4f34f7 100644
--- a/be/src/common/status.h
+++ b/be/src/common/status.h
@@ -262,7 +262,7 @@ E(INVERTED_INDEX_INVALID_PARAMETERS, -6000);
E(INVERTED_INDEX_NOT_SUPPORTED, -6001);
E(INVERTED_INDEX_CLUCENE_ERROR, -6002);
E(INVERTED_INDEX_FILE_NOT_FOUND, -6003);
-E(INVERTED_INDEX_FILE_HIT_LIMIT, -6004);
+E(INVERTED_INDEX_BYPASS, -6004);
E(INVERTED_INDEX_NO_TERMS, -6005);
E(INVERTED_INDEX_RENAME_FILE_FAILED, -6006);
E(INVERTED_INDEX_EVALUATE_SKIPPED, -6007);
@@ -293,7 +293,7 @@ constexpr bool capture_stacktrace() {
&& code != ErrorCode::INVERTED_INDEX_NOT_SUPPORTED
&& code != ErrorCode::INVERTED_INDEX_CLUCENE_ERROR
&& code != ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND
- && code != ErrorCode::INVERTED_INDEX_FILE_HIT_LIMIT
+ && code != ErrorCode::INVERTED_INDEX_BYPASS
&& code != ErrorCode::INVERTED_INDEX_NO_TERMS
&& code != ErrorCode::INVERTED_INDEX_EVALUATE_SKIPPED
&& code != ErrorCode::META_KEY_NOT_FOUND
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
index a97eae836f..ad0deeafb2 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
@@ -73,6 +73,13 @@
namespace doris {
namespace segment_v2 {
+bool InvertedIndexReader::_is_range_query(InvertedIndexQueryType query_type) {
+ return (query_type == InvertedIndexQueryType::GREATER_THAN_QUERY ||
+ query_type == InvertedIndexQueryType::GREATER_EQUAL_QUERY ||
+ query_type == InvertedIndexQueryType::LESS_THAN_QUERY ||
+ query_type == InvertedIndexQueryType::LESS_EQUAL_QUERY);
+}
+
bool InvertedIndexReader::_is_match_query(InvertedIndexQueryType query_type) {
return (query_type == InvertedIndexQueryType::MATCH_ANY_QUERY ||
query_type == InvertedIndexQueryType::MATCH_ALL_QUERY ||
@@ -491,8 +498,15 @@ Status StringTypeInvertedIndexReader::query(OlapReaderStatistics* stats,
result.add(docid);
});
} catch (const CLuceneError& e) {
- LOG(WARNING) << "CLuceneError occured, error msg: " << e.what();
- return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>();
+ if (_is_range_query(query_type) && e.number() == CL_ERR_TooManyClauses) {
+ LOG(WARNING) << "range query term exceeds limits, try to downgrade from inverted index,"
+ << "column name:" << column_name << " search_str:" << search_str;
+ return Status::Error<ErrorCode::INVERTED_INDEX_BYPASS>();
+ } else {
+ LOG(WARNING) << "CLuceneError occured, error msg: " << e.what()
+ << "column name:" << column_name << " search_str:" << search_str;
+ return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>();
+ }
}
// add to cache
@@ -875,7 +889,7 @@ Status InvertedIndexIterator::read_from_inverted_index(const std::string& column
if (hit_count > segment_num_rows * query_bkd_limit_percent / 100) {
LOG(INFO) << "hit count: " << hit_count << ", bkd inverted reached limit "
<< query_bkd_limit_percent << "%, segment num rows: " << segment_num_rows;
- return Status::Error<ErrorCode::INVERTED_INDEX_FILE_HIT_LIMIT>();
+ return Status::Error<ErrorCode::INVERTED_INDEX_BYPASS>();
}
}
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.h b/be/src/olap/rowset/segment_v2/inverted_index_reader.h
index d68939fd5c..f9272d16fa 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_reader.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.h
@@ -109,6 +109,7 @@ public:
InvertedIndexCtx* inverted_index_ctx);
protected:
+ bool _is_range_query(InvertedIndexQueryType query_type);
bool _is_match_query(InvertedIndexQueryType query_type);
friend class InvertedIndexIterator;
io::FileSystemSPtr _fs;
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index 284ae71f1a..92717945b1 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -752,13 +752,13 @@ Status SegmentIterator::_apply_index_except_leafnode_of_andnode() {
bool SegmentIterator::_downgrade_without_index(Status res, bool need_remaining) {
if (res.code() == ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND ||
- res.code() == ErrorCode::INVERTED_INDEX_FILE_HIT_LIMIT ||
+ res.code() == ErrorCode::INVERTED_INDEX_BYPASS ||
res.code() == ErrorCode::INVERTED_INDEX_EVALUATE_SKIPPED ||
(res.code() == ErrorCode::INVERTED_INDEX_NO_TERMS && need_remaining)) {
// 1. INVERTED_INDEX_FILE_NOT_FOUND means index file has not been built,
// usually occurs when creating a new index, queries can be downgraded
// without index.
- // 2. INVERTED_INDEX_FILE_HIT_LIMIT means the hit of condition by index
+ // 2. INVERTED_INDEX_BYPASS means the hit of condition by index
// has reached the optimal limit, downgrade without index query can
// improve query performance.
// 3. INVERTED_INDEX_EVALUATE_SKIPPED means the inverted index is not
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org