You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2022/09/15 09:21:46 UTC
[lucene] branch main updated: LUCENE-10674: Ensure BitSetConjDISI returns NO_MORE_DOCS when sub-iterator exhausts. (#1068)
This is an automated email from the ASF dual-hosted git repository.
jpountz pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git
The following commit(s) were added to refs/heads/main by this push:
new 0587844742a LUCENE-10674: Ensure BitSetConjDISI returns NO_MORE_DOCS when sub-iterator exhausts. (#1068)
0587844742a is described below
commit 0587844742ab5da02469bf9aa544a3f1b40e5089
Author: John Mazanec <jm...@amazon.com>
AuthorDate: Thu Sep 15 02:21:39 2022 -0700
LUCENE-10674: Ensure BitSetConjDISI returns NO_MORE_DOCS when sub-iterator exhausts. (#1068)
Signed-off-by: John Mazanec <jm...@amazon.com>
---
lucene/CHANGES.txt | 3 ++-
.../org/apache/lucene/search/ConjunctionDISI.java | 3 +++
.../apache/lucene/search/TestConjunctionDISI.java | 31 +++++++++++++++++++---
3 files changed, 33 insertions(+), 4 deletions(-)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 4da55591250..d717c44bfa2 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -92,11 +92,12 @@ API Changes
Bug Fixes
---------------------
-
* GITHUB#11726: Indexing term vectors on large documents could fail due to
trying to apply a dictionary whose size is greater than the maximum supported
window size for LZ4. (Adrien Grand)
+* LUCENE-10674: Ensure BitSetConjDISI returns NO_MORE_DOCS when sub-iterator exhausts. (Jack Mazanec)
+
Other
---------------------
* LUCENE-10423: Remove usages of System.currentTimeMillis() from tests. (Marios Trivyzas)
diff --git a/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java b/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java
index e00bc752394..b70224f1ec5 100644
--- a/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java
+++ b/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java
@@ -281,6 +281,9 @@ final class ConjunctionDISI extends DocIdSetIterator {
advanceLead:
for (; ; doc = lead.nextDoc()) {
if (doc >= minLength) {
+ if (doc != NO_MORE_DOCS) {
+ lead.advance(NO_MORE_DOCS);
+ }
return NO_MORE_DOCS;
}
for (BitSet bitSet : bitSets) {
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestConjunctionDISI.java b/lucene/core/src/test/org/apache/lucene/search/TestConjunctionDISI.java
index 436982ffc59..429c84c891a 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestConjunctionDISI.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestConjunctionDISI.java
@@ -16,6 +16,8 @@
*/
package org.apache.lucene.search;
+import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
+
import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
@@ -213,9 +215,7 @@ public class TestConjunctionDISI extends LuceneTestCase {
private static FixedBitSet toBitSet(int maxDoc, DocIdSetIterator iterator) throws IOException {
final FixedBitSet set = new FixedBitSet(maxDoc);
- for (int doc = iterator.nextDoc();
- doc != DocIdSetIterator.NO_MORE_DOCS;
- doc = iterator.nextDoc()) {
+ for (int doc = iterator.nextDoc(); doc != NO_MORE_DOCS; doc = iterator.nextDoc()) {
set.set(doc);
}
return set;
@@ -459,4 +459,29 @@ public class TestConjunctionDISI extends LuceneTestCase {
AssertionError ex = expectThrows(AssertionError.class, () -> conjunction.nextDoc());
assertEquals("Sub-iterators of ConjunctionDISI are not on the same document!", ex.getMessage());
}
+
+ public void testBitSetConjunctionDISIDocIDOnExhaust() throws IOException {
+ int numBitSetIterators = TestUtil.nextInt(random(), 2, 5);
+ DocIdSetIterator[] iterators = new DocIdSetIterator[numBitSetIterators + 1];
+
+ // Create sparse DocIdSetIterator with a single match that is greater than lengths of bitset
+ // iterators
+ int maxBitSetLength = 1000;
+ int minBitSetLength = 2;
+ int leadMaxDoc = maxBitSetLength + 1;
+ iterators[iterators.length - 1] = DocIdSetIterator.range(leadMaxDoc, leadMaxDoc + 1);
+
+ for (int i = 0; i < numBitSetIterators; i++) {
+ int bitSetLength = TestUtil.nextInt(random(), minBitSetLength, maxBitSetLength);
+ FixedBitSet bitSet = new FixedBitSet(bitSetLength);
+ bitSet.set(0, bitSetLength - 1);
+ iterators[i] = new BitDocIdSet(bitSet).iterator();
+ }
+
+ final DocIdSetIterator conjunction =
+ ConjunctionUtils.intersectIterators(Arrays.asList(iterators));
+
+ assertEquals(NO_MORE_DOCS, conjunction.nextDoc());
+ assertEquals(NO_MORE_DOCS, conjunction.docID());
+ }
}