You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2023/01/12 17:39:43 UTC
[lucene] 02/02: Speed up DocIdMerger on sorted indexes. (#12081)
This is an automated email from the ASF dual-hosted git repository.
jpountz pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/lucene.git
commit 5f0d22073446d174a37f80a6d2265b36f4bf247c
Author: Adrien Grand <jp...@gmail.com>
AuthorDate: Thu Jan 12 18:27:45 2023 +0100
Speed up DocIdMerger on sorted indexes. (#12081)
In the case when an index is sorted on a low-cardinality field, or the index
sort order correlates with the order in which documents get ingested, we can
optimize `SortedDocIDMerger` by doing a single comparison with the doc ID on
the next sub. This checks covers at the same time whether the priority queue
needs reordering and whether the current sub reached `NO_MORE_DOCS`.
---
lucene/CHANGES.txt | 2 ++
.../java/org/apache/lucene/index/DocIDMerger.java | 21 ++++++++++++++++++++-
2 files changed, 22 insertions(+), 1 deletion(-)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 97f1e833019..5dd8547bf58 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -184,6 +184,8 @@ Optimizations
* GITHUB#12079: Faster merging of 1D points. (Adrien Grand)
+* GITHUB#12081: Small merging speedup on sorted indexes. (Adrien Grand)
+
Other
---------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/DocIDMerger.java b/lucene/core/src/java/org/apache/lucene/index/DocIDMerger.java
index f9accb710f1..dd1840ca850 100644
--- a/lucene/core/src/java/org/apache/lucene/index/DocIDMerger.java
+++ b/lucene/core/src/java/org/apache/lucene/index/DocIDMerger.java
@@ -137,6 +137,7 @@ public abstract class DocIDMerger<T extends DocIDMerger.Sub> {
private final List<T> subs;
private T current;
private final PriorityQueue<T> queue;
+ private int queueMinDocID;
private SortedDocIDMerger(List<T> subs, int maxCount) throws IOException {
if (maxCount <= 1) {
@@ -154,6 +155,14 @@ public abstract class DocIDMerger<T extends DocIDMerger.Sub> {
reset();
}
+ private void setQueueMinDocID() {
+ if (queue.size() > 0) {
+ queueMinDocID = queue.top().mappedDocID;
+ } else {
+ queueMinDocID = DocIdSetIterator.NO_MORE_DOCS;
+ }
+ }
+
@Override
public void reset() throws IOException {
// caller may not have fully consumed the queue:
@@ -171,23 +180,33 @@ public abstract class DocIDMerger<T extends DocIDMerger.Sub> {
queue.add(sub);
} // else all docs in this sub were deleted; do not add it to the queue!
}
+ setQueueMinDocID();
}
@Override
public T next() throws IOException {
int nextDoc = current.nextMappedDoc();
+ if (nextDoc < queueMinDocID) {
+ // This should be the common case when index sorting is either disabled, or enabled on a
+ // low-cardinality field, or enabled on a field that correlates with index order.
+ return current;
+ }
+
if (nextDoc == NO_MORE_DOCS) {
if (queue.size() == 0) {
current = null;
} else {
current = queue.pop();
}
- } else if (queue.size() > 0 && nextDoc > queue.top().mappedDocID) {
+ } else if (queue.size() > 0) {
+ assert queueMinDocID == queue.top().mappedDocID;
+ assert nextDoc > queueMinDocID;
T newCurrent = queue.top();
queue.updateTop(current);
current = newCurrent;
}
+ setQueueMinDocID();
return current;
}
}