You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2022/11/16 16:04:20 UTC
[lucene] branch main updated: fix bug of incorrect cost after upgradeToBitSet in DocIdSetBuilder class (#11939)
This is an automated email from the ASF dual-hosted git repository.
jpountz pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git
The following commit(s) were added to refs/heads/main by this push:
new 3c5bcb383b5 fix bug of incorrect cost after upgradeToBitSet in DocIdSetBuilder class (#11939)
3c5bcb383b5 is described below
commit 3c5bcb383b5824e410bd3fb3bafd93dc88924cc5
Author: Mao Suhan <ma...@foxmail.com>
AuthorDate: Thu Nov 17 00:04:15 2022 +0800
fix bug of incorrect cost after upgradeToBitSet in DocIdSetBuilder class (#11939)
---
lucene/CHANGES.txt | 3 +++
.../src/java/org/apache/lucene/util/DocIdSetBuilder.java | 4 ++--
.../src/test/org/apache/lucene/util/TestDocIdSetBuilder.java | 12 ++++++++++++
3 files changed, 17 insertions(+), 2 deletions(-)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 324f2f924f0..9fc38a90e73 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -199,6 +199,9 @@ Bug Fixes
This addresses a bug that was introduced in 9.2.0 where having many vectors is not handled well
in the vector connections reader.
+* GITHUB#11939: Fix incorrect cost calculation in DocIdSetBuilder after upgradeToBitSet when doc list is growing.
+ This addresses a bug where the cost of TermRangeQuery/TermInSetQuery and some other queries will be highly underestimated.
+
Improvements
---------------------
* GITHUB#11912, GITHUB#11918: Port generic exception handling from MemorySegmentIndexInput
diff --git a/lucene/core/src/java/org/apache/lucene/util/DocIdSetBuilder.java b/lucene/core/src/java/org/apache/lucene/util/DocIdSetBuilder.java
index 67b3dde9f20..28128af05f6 100644
--- a/lucene/core/src/java/org/apache/lucene/util/DocIdSetBuilder.java
+++ b/lucene/core/src/java/org/apache/lucene/util/DocIdSetBuilder.java
@@ -162,12 +162,12 @@ public final class DocIdSetBuilder {
* RoaringDocIdSet.Builder}.
*/
public void add(DocIdSetIterator iter) throws IOException {
+ int cost = (int) Math.min(Integer.MAX_VALUE, iter.cost());
+ BulkAdder adder = grow(cost);
if (bitSet != null) {
bitSet.or(iter);
return;
}
- int cost = (int) Math.min(Integer.MAX_VALUE, iter.cost());
- BulkAdder adder = grow(cost);
for (int i = 0; i < cost; ++i) {
int doc = iter.nextDoc();
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
diff --git a/lucene/core/src/test/org/apache/lucene/util/TestDocIdSetBuilder.java b/lucene/core/src/test/org/apache/lucene/util/TestDocIdSetBuilder.java
index 2fa146581c6..88dbf24e2d1 100644
--- a/lucene/core/src/test/org/apache/lucene/util/TestDocIdSetBuilder.java
+++ b/lucene/core/src/test/org/apache/lucene/util/TestDocIdSetBuilder.java
@@ -243,6 +243,18 @@ public class TestDocIdSetBuilder extends LuceneTestCase {
assertTrue(builder.multivalued);
}
+ public void testCostIsCorrectAfterBitsetUpgrade() throws IOException {
+ final int maxDoc = 1000000;
+ DocIdSetBuilder builder = new DocIdSetBuilder(maxDoc);
+ // 1000000 >> 6 is greater than DocIdSetBuilder.threshold which is 1000000 >> 7
+ for (int i = 0; i < 1000000 >> 6; ++i) {
+ builder.add(DocIdSetIterator.range(i, i + 1));
+ }
+ DocIdSet result = builder.build();
+ assertTrue(result instanceof BitDocIdSet);
+ assertEquals(1000000 >> 6, result.iterator().cost());
+ }
+
private static class DummyTerms extends Terms {
private final int docCount;