You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by iv...@apache.org on 2021/12/07 06:42:04 UTC

[lucene] branch main updated: LUCENE-10289: Change DocIdSetBuilder#grow() from taking an int to a long (#520)

This is an automated email from the ASF dual-hosted git repository.

ivera pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git


The following commit(s) were added to refs/heads/main by this push:
     new af1e68b  LUCENE-10289: Change DocIdSetBuilder#grow() from taking an int to a long (#520)
af1e68b is described below

commit af1e68b89197bd6399c0db18e478716951dd381c
Author: Ignacio Vera <iv...@apache.org>
AuthorDate: Tue Dec 7 07:41:09 2021 +0100

    LUCENE-10289: Change DocIdSetBuilder#grow() from taking an int to a long (#520)
---
 lucene/CHANGES.txt                                 |  2 ++
 .../org/apache/lucene/util/DocIdSetBuilder.java    | 29 ++++++++++++-----
 .../apache/lucene/util/TestDocIdSetBuilder.java    | 36 ++++++++++++++++++++--
 3 files changed, 58 insertions(+), 9 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index fef136f..e935c4e 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -41,6 +41,8 @@ API Changes
 
 * LUCENE-10244: MultiCollector::getCollectors is now public, allowing users to access the wrapped
   collectors. (Andriy Redko)
+  
+* LUCENE-10289: DocIdSetBuilder#grow() takes now a long instead of an int. (Ignacio Vera)  
 
 New Features
 ---------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/DocIdSetBuilder.java b/lucene/core/src/java/org/apache/lucene/util/DocIdSetBuilder.java
index 67b3dde..f006cb8 100644
--- a/lucene/core/src/java/org/apache/lucene/util/DocIdSetBuilder.java
+++ b/lucene/core/src/java/org/apache/lucene/util/DocIdSetBuilder.java
@@ -166,9 +166,9 @@ public final class DocIdSetBuilder {
       bitSet.or(iter);
       return;
     }
-    int cost = (int) Math.min(Integer.MAX_VALUE, iter.cost());
+    long cost = iter.cost();
     BulkAdder adder = grow(cost);
-    for (int i = 0; i < cost; ++i) {
+    for (long i = 0; i < cost; ++i) {
       int doc = iter.nextDoc();
       if (doc == DocIdSetIterator.NO_MORE_DOCS) {
         return;
@@ -184,20 +184,35 @@ public final class DocIdSetBuilder {
    * Reserve space and return a {@link BulkAdder} object that can be used to add up to {@code
    * numDocs} documents.
    */
-  public BulkAdder grow(int numDocs) {
+  public BulkAdder grow(long numDocs) {
     if (bitSet == null) {
-      if ((long) totalAllocated + numDocs <= threshold) {
-        ensureBufferCapacity(numDocs);
+      if ((long) totalAllocated + checkTotalAllocatedOverflow(numDocs) <= threshold) {
+        // For extra safety we use toIntExact
+        ensureBufferCapacity(Math.toIntExact(numDocs));
       } else {
         upgradeToBitSet();
-        counter += numDocs;
+        counter += checkCounterOverflow(numDocs);
       }
     } else {
-      counter += numDocs;
+      counter += checkCounterOverflow(numDocs);
     }
     return adder;
   }
 
+  private long checkTotalAllocatedOverflow(long numDocs) {
+    if ((long) totalAllocated + numDocs < totalAllocated) {
+      throw new ArithmeticException("long overflow");
+    }
+    return numDocs;
+  }
+
+  private long checkCounterOverflow(long numDocs) {
+    if (counter + numDocs < counter) {
+      throw new ArithmeticException("long overflow");
+    }
+    return numDocs;
+  }
+
   private void ensureBufferCapacity(int numDocs) {
     if (buffers.isEmpty()) {
       addBuffer(additionalCapacity(numDocs));
diff --git a/lucene/core/src/test/org/apache/lucene/util/TestDocIdSetBuilder.java b/lucene/core/src/test/org/apache/lucene/util/TestDocIdSetBuilder.java
index 1c7171a..82da3bb 100644
--- a/lucene/core/src/test/org/apache/lucene/util/TestDocIdSetBuilder.java
+++ b/lucene/core/src/test/org/apache/lucene/util/TestDocIdSetBuilder.java
@@ -128,9 +128,9 @@ public class TestDocIdSetBuilder extends LuceneTestCase {
       for (j = 0; j < array.length; ) {
         final int l = TestUtil.nextInt(random(), 1, array.length - j);
         DocIdSetBuilder.BulkAdder adder = null;
-        for (int k = 0, budget = 0; k < l; ++k) {
+        for (long k = 0, budget = 0; k < l; ++k) {
           if (budget == 0 || rarely()) {
-            budget = TestUtil.nextInt(random(), 1, l - k + 5);
+            budget = TestUtil.nextLong(random(), 1, l - k + 5);
             adder = builder.grow(budget);
           }
           adder.add(array[j++]);
@@ -241,6 +241,38 @@ public class TestDocIdSetBuilder extends LuceneTestCase {
     assertTrue(builder.multivalued);
   }
 
+  @Nightly
+  public void testLotsOfDocs() throws IOException {
+    final int docCount = 1;
+    final long numDocs = (long) Integer.MAX_VALUE + 1;
+    PointValues values = new DummyPointValues(docCount, numDocs);
+    DocIdSetBuilder builder = new DocIdSetBuilder(100, values, "foo");
+    DocIdSetBuilder.BulkAdder adder = builder.grow(numDocs);
+    for (long i = 0; i < numDocs; ++i) {
+      adder.add(0);
+    }
+    DocIdSet result = builder.build();
+    assertTrue(result instanceof BitDocIdSet);
+    assertEquals(1, result.iterator().cost());
+  }
+
+  public void testLongOverflow() throws IOException {
+    {
+      DocIdSetBuilder builder = new DocIdSetBuilder(100);
+      builder.grow(1L);
+      Exception ex = expectThrows(ArithmeticException.class, () -> builder.grow(Long.MAX_VALUE));
+      assertEquals("long overflow", ex.getMessage());
+    }
+    {
+      DocIdSetBuilder builder = new DocIdSetBuilder(100);
+      builder.grow((long) Integer.MAX_VALUE + 1);
+      Exception ex =
+          expectThrows(
+              ArithmeticException.class, () -> builder.grow(Long.MAX_VALUE - Integer.MAX_VALUE));
+      assertEquals("long overflow", ex.getMessage());
+    }
+  }
+
   private static class DummyTerms extends Terms {
 
     private final int docCount;