You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by iv...@apache.org on 2021/12/07 06:42:04 UTC
[lucene] branch main updated: LUCENE-10289: Change DocIdSetBuilder#grow() from taking an int to a long (#520)
This is an automated email from the ASF dual-hosted git repository.
ivera pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git
The following commit(s) were added to refs/heads/main by this push:
new af1e68b LUCENE-10289: Change DocIdSetBuilder#grow() from taking an int to a long (#520)
af1e68b is described below
commit af1e68b89197bd6399c0db18e478716951dd381c
Author: Ignacio Vera <iv...@apache.org>
AuthorDate: Tue Dec 7 07:41:09 2021 +0100
LUCENE-10289: Change DocIdSetBuilder#grow() from taking an int to a long (#520)
---
lucene/CHANGES.txt | 2 ++
.../org/apache/lucene/util/DocIdSetBuilder.java | 29 ++++++++++++-----
.../apache/lucene/util/TestDocIdSetBuilder.java | 36 ++++++++++++++++++++--
3 files changed, 58 insertions(+), 9 deletions(-)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index fef136f..e935c4e 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -41,6 +41,8 @@ API Changes
* LUCENE-10244: MultiCollector::getCollectors is now public, allowing users to access the wrapped
collectors. (Andriy Redko)
+
+* LUCENE-10289: DocIdSetBuilder#grow() takes now a long instead of an int. (Ignacio Vera)
New Features
---------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/DocIdSetBuilder.java b/lucene/core/src/java/org/apache/lucene/util/DocIdSetBuilder.java
index 67b3dde..f006cb8 100644
--- a/lucene/core/src/java/org/apache/lucene/util/DocIdSetBuilder.java
+++ b/lucene/core/src/java/org/apache/lucene/util/DocIdSetBuilder.java
@@ -166,9 +166,9 @@ public final class DocIdSetBuilder {
bitSet.or(iter);
return;
}
- int cost = (int) Math.min(Integer.MAX_VALUE, iter.cost());
+ long cost = iter.cost();
BulkAdder adder = grow(cost);
- for (int i = 0; i < cost; ++i) {
+ for (long i = 0; i < cost; ++i) {
int doc = iter.nextDoc();
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
return;
@@ -184,20 +184,35 @@ public final class DocIdSetBuilder {
* Reserve space and return a {@link BulkAdder} object that can be used to add up to {@code
* numDocs} documents.
*/
- public BulkAdder grow(int numDocs) {
+ public BulkAdder grow(long numDocs) {
if (bitSet == null) {
- if ((long) totalAllocated + numDocs <= threshold) {
- ensureBufferCapacity(numDocs);
+ if ((long) totalAllocated + checkTotalAllocatedOverflow(numDocs) <= threshold) {
+ // For extra safety we use toIntExact
+ ensureBufferCapacity(Math.toIntExact(numDocs));
} else {
upgradeToBitSet();
- counter += numDocs;
+ counter += checkCounterOverflow(numDocs);
}
} else {
- counter += numDocs;
+ counter += checkCounterOverflow(numDocs);
}
return adder;
}
+ private long checkTotalAllocatedOverflow(long numDocs) {
+ if ((long) totalAllocated + numDocs < totalAllocated) {
+ throw new ArithmeticException("long overflow");
+ }
+ return numDocs;
+ }
+
+ private long checkCounterOverflow(long numDocs) {
+ if (counter + numDocs < counter) {
+ throw new ArithmeticException("long overflow");
+ }
+ return numDocs;
+ }
+
private void ensureBufferCapacity(int numDocs) {
if (buffers.isEmpty()) {
addBuffer(additionalCapacity(numDocs));
diff --git a/lucene/core/src/test/org/apache/lucene/util/TestDocIdSetBuilder.java b/lucene/core/src/test/org/apache/lucene/util/TestDocIdSetBuilder.java
index 1c7171a..82da3bb 100644
--- a/lucene/core/src/test/org/apache/lucene/util/TestDocIdSetBuilder.java
+++ b/lucene/core/src/test/org/apache/lucene/util/TestDocIdSetBuilder.java
@@ -128,9 +128,9 @@ public class TestDocIdSetBuilder extends LuceneTestCase {
for (j = 0; j < array.length; ) {
final int l = TestUtil.nextInt(random(), 1, array.length - j);
DocIdSetBuilder.BulkAdder adder = null;
- for (int k = 0, budget = 0; k < l; ++k) {
+ for (long k = 0, budget = 0; k < l; ++k) {
if (budget == 0 || rarely()) {
- budget = TestUtil.nextInt(random(), 1, l - k + 5);
+ budget = TestUtil.nextLong(random(), 1, l - k + 5);
adder = builder.grow(budget);
}
adder.add(array[j++]);
@@ -241,6 +241,38 @@ public class TestDocIdSetBuilder extends LuceneTestCase {
assertTrue(builder.multivalued);
}
+ @Nightly
+ public void testLotsOfDocs() throws IOException {
+ final int docCount = 1;
+ final long numDocs = (long) Integer.MAX_VALUE + 1;
+ PointValues values = new DummyPointValues(docCount, numDocs);
+ DocIdSetBuilder builder = new DocIdSetBuilder(100, values, "foo");
+ DocIdSetBuilder.BulkAdder adder = builder.grow(numDocs);
+ for (long i = 0; i < numDocs; ++i) {
+ adder.add(0);
+ }
+ DocIdSet result = builder.build();
+ assertTrue(result instanceof BitDocIdSet);
+ assertEquals(1, result.iterator().cost());
+ }
+
+ public void testLongOverflow() throws IOException {
+ {
+ DocIdSetBuilder builder = new DocIdSetBuilder(100);
+ builder.grow(1L);
+ Exception ex = expectThrows(ArithmeticException.class, () -> builder.grow(Long.MAX_VALUE));
+ assertEquals("long overflow", ex.getMessage());
+ }
+ {
+ DocIdSetBuilder builder = new DocIdSetBuilder(100);
+ builder.grow((long) Integer.MAX_VALUE + 1);
+ Exception ex =
+ expectThrows(
+ ArithmeticException.class, () -> builder.grow(Long.MAX_VALUE - Integer.MAX_VALUE));
+ assertEquals("long overflow", ex.getMessage());
+ }
+ }
+
private static class DummyTerms extends Terms {
private final int docCount;