You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2022/11/16 16:04:20 UTC

[lucene] branch main updated: fix bug of incorrect cost after upgradeToBitSet in DocIdSetBuilder class (#11939)

This is an automated email from the ASF dual-hosted git repository.

jpountz pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git


The following commit(s) were added to refs/heads/main by this push:
     new 3c5bcb383b5 fix bug of incorrect cost after upgradeToBitSet in DocIdSetBuilder class (#11939)
3c5bcb383b5 is described below

commit 3c5bcb383b5824e410bd3fb3bafd93dc88924cc5
Author: Mao Suhan <ma...@foxmail.com>
AuthorDate: Thu Nov 17 00:04:15 2022 +0800

    fix bug of incorrect cost after upgradeToBitSet in DocIdSetBuilder class (#11939)
---
 lucene/CHANGES.txt                                           |  3 +++
 .../src/java/org/apache/lucene/util/DocIdSetBuilder.java     |  4 ++--
 .../src/test/org/apache/lucene/util/TestDocIdSetBuilder.java | 12 ++++++++++++
 3 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 324f2f924f0..9fc38a90e73 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -199,6 +199,9 @@ Bug Fixes
  This addresses a bug that was introduced in 9.2.0 where having many vectors is not handled well
  in the vector connections reader.
 
+* GITHUB#11939: Fix incorrect cost calculation in DocIdSetBuilder after upgradeToBitSet when doc list is growing.
+  This addresses a bug where the cost of TermRangeQuery/TermInSetQuery and some other queries will be highly underestimated.
+
 Improvements
 ---------------------
 * GITHUB#11912, GITHUB#11918: Port generic exception handling from MemorySegmentIndexInput
diff --git a/lucene/core/src/java/org/apache/lucene/util/DocIdSetBuilder.java b/lucene/core/src/java/org/apache/lucene/util/DocIdSetBuilder.java
index 67b3dde9f20..28128af05f6 100644
--- a/lucene/core/src/java/org/apache/lucene/util/DocIdSetBuilder.java
+++ b/lucene/core/src/java/org/apache/lucene/util/DocIdSetBuilder.java
@@ -162,12 +162,12 @@ public final class DocIdSetBuilder {
    * RoaringDocIdSet.Builder}.
    */
   public void add(DocIdSetIterator iter) throws IOException {
+    int cost = (int) Math.min(Integer.MAX_VALUE, iter.cost());
+    BulkAdder adder = grow(cost);
     if (bitSet != null) {
       bitSet.or(iter);
       return;
     }
-    int cost = (int) Math.min(Integer.MAX_VALUE, iter.cost());
-    BulkAdder adder = grow(cost);
     for (int i = 0; i < cost; ++i) {
       int doc = iter.nextDoc();
       if (doc == DocIdSetIterator.NO_MORE_DOCS) {
diff --git a/lucene/core/src/test/org/apache/lucene/util/TestDocIdSetBuilder.java b/lucene/core/src/test/org/apache/lucene/util/TestDocIdSetBuilder.java
index 2fa146581c6..88dbf24e2d1 100644
--- a/lucene/core/src/test/org/apache/lucene/util/TestDocIdSetBuilder.java
+++ b/lucene/core/src/test/org/apache/lucene/util/TestDocIdSetBuilder.java
@@ -243,6 +243,18 @@ public class TestDocIdSetBuilder extends LuceneTestCase {
     assertTrue(builder.multivalued);
   }
 
+  public void testCostIsCorrectAfterBitsetUpgrade() throws IOException {
+    final int maxDoc = 1000000;
+    DocIdSetBuilder builder = new DocIdSetBuilder(maxDoc);
+    // 1000000 >> 6 is greater than DocIdSetBuilder.threshold which is 1000000 >> 7
+    for (int i = 0; i < 1000000 >> 6; ++i) {
+      builder.add(DocIdSetIterator.range(i, i + 1));
+    }
+    DocIdSet result = builder.build();
+    assertTrue(result instanceof BitDocIdSet);
+    assertEquals(1000000 >> 6, result.iterator().cost());
+  }
+
   private static class DummyTerms extends Terms {
 
     private final int docCount;