You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by dn...@apache.org on 2021/09/28 19:26:38 UTC
[lucene] branch main updated: LUCENE-10126: Fix competitiveIterator
wrongly skip documents (#324)
This is an automated email from the ASF dual-hosted git repository.
dnhatn pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git
The following commit(s) were added to refs/heads/main by this push:
new 5ab900e LUCENE-10126: Fix competitiveIterator wrongly skip documents (#324)
5ab900e is described below
commit 5ab900e10b7eab8de13c45ce2a4edec5f435aba3
Author: Nhat Nguyen <nh...@elastic.co>
AuthorDate: Tue Sep 28 15:26:30 2021 -0400
LUCENE-10126: Fix competitiveIterator wrongly skip documents (#324)
The competitive iterator can wrongly skip a document that is advanced
but not collected in the previous scoreRange.
---
.../src/java/org/apache/lucene/search/Weight.java | 27 +++++++++++++---------
.../lucene/search/comparators/DocComparator.java | 2 +-
.../search/comparators/NumericComparator.java | 2 +-
.../apache/lucene/search/TestSortOptimization.java | 16 ++++++-------
.../apache/lucene/search/AssertingBulkScorer.java | 18 +++++++++++++--
5 files changed, 42 insertions(+), 23 deletions(-)
diff --git a/lucene/core/src/java/org/apache/lucene/search/Weight.java b/lucene/core/src/java/org/apache/lucene/search/Weight.java
index 2975435..9866e54 100644
--- a/lucene/core/src/java/org/apache/lucene/search/Weight.java
+++ b/lucene/core/src/java/org/apache/lucene/search/Weight.java
@@ -225,18 +225,23 @@ public abstract class Weight implements SegmentCacheable {
throws IOException {
collector.setScorer(scorer);
DocIdSetIterator scorerIterator = twoPhase == null ? iterator : twoPhase.approximation();
- DocIdSetIterator collectorIterator = collector.competitiveIterator();
+ DocIdSetIterator competitiveIterator = collector.competitiveIterator();
DocIdSetIterator filteredIterator;
- if (collectorIterator == null) {
+ if (competitiveIterator == null) {
filteredIterator = scorerIterator;
} else {
+ // Wrap CompetitiveIterator and ScorerIterator start with (i.e., calling nextDoc()) the last
+ // visited docID because ConjunctionDISI might have advanced to it in the previous
+ // scoreRange, but we didn't process due to the range limit of scoreRange.
if (scorerIterator.docID() != -1) {
- // Wrap ScorerIterator to start from -1 for conjunction
scorerIterator = new StartDISIWrapper(scorerIterator);
}
+ if (competitiveIterator.docID() != -1) {
+ competitiveIterator = new StartDISIWrapper(competitiveIterator);
+ }
// filter scorerIterator to keep only competitive docs as defined by collector
filteredIterator =
- ConjunctionUtils.intersectIterators(Arrays.asList(scorerIterator, collectorIterator));
+ ConjunctionUtils.intersectIterators(Arrays.asList(scorerIterator, competitiveIterator));
}
if (filteredIterator.docID() == -1 && min == 0 && max == DocIdSetIterator.NO_MORE_DOCS) {
scoreAll(collector, filteredIterator, twoPhase, acceptDocs);
@@ -314,15 +319,15 @@ public abstract class Weight implements SegmentCacheable {
}
}
- /** Wraps an internal docIdSetIterator for it to start with docID = -1 */
- protected static class StartDISIWrapper extends DocIdSetIterator {
+ /** Wraps an internal docIdSetIterator for it to start with the last visited docID */
+ private static class StartDISIWrapper extends DocIdSetIterator {
private final DocIdSetIterator in;
- private final int min;
+ private final int startDocID;
private int docID = -1;
- public StartDISIWrapper(DocIdSetIterator in) {
+ StartDISIWrapper(DocIdSetIterator in) {
this.in = in;
- this.min = in.docID();
+ this.startDocID = in.docID();
}
@Override
@@ -337,8 +342,8 @@ public abstract class Weight implements SegmentCacheable {
@Override
public int advance(int target) throws IOException {
- if (target <= min) {
- return docID = min;
+ if (target <= startDocID) {
+ return docID = startDocID;
}
return docID = in.advance(target);
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/comparators/DocComparator.java b/lucene/core/src/java/org/apache/lucene/search/comparators/DocComparator.java
index 51bc049..7b8845b 100644
--- a/lucene/core/src/java/org/apache/lucene/search/comparators/DocComparator.java
+++ b/lucene/core/src/java/org/apache/lucene/search/comparators/DocComparator.java
@@ -131,7 +131,7 @@ public class DocComparator extends FieldComparator<Integer> {
return null;
} else {
return new DocIdSetIterator() {
- private int docID = -1;
+ private int docID = competitiveIterator.docID();
@Override
public int nextDoc() throws IOException {
diff --git a/lucene/core/src/java/org/apache/lucene/search/comparators/NumericComparator.java b/lucene/core/src/java/org/apache/lucene/search/comparators/NumericComparator.java
index 051d9cc..16eecf7 100644
--- a/lucene/core/src/java/org/apache/lucene/search/comparators/NumericComparator.java
+++ b/lucene/core/src/java/org/apache/lucene/search/comparators/NumericComparator.java
@@ -273,7 +273,7 @@ public abstract class NumericComparator<T extends Number> extends FieldComparato
public DocIdSetIterator competitiveIterator() {
if (enableSkipping == false) return null;
return new DocIdSetIterator() {
- private int docID = -1;
+ private int docID = competitiveIterator.docID();
@Override
public int nextDoc() throws IOException {
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java b/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java
index 3ab679d..379241e 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java
@@ -58,7 +58,7 @@ public class TestSortOptimization extends LuceneTestCase {
}
final IndexReader reader = DirectoryReader.open(writer);
writer.close();
- IndexSearcher searcher = new IndexSearcher(reader);
+ IndexSearcher searcher = newSearcher(reader);
final SortField sortField = new SortField("my_field", SortField.Type.LONG);
final Sort sort = new Sort(sortField);
final int numHits = 3;
@@ -143,7 +143,7 @@ public class TestSortOptimization extends LuceneTestCase {
}
final IndexReader reader = DirectoryReader.open(writer);
writer.close();
- IndexSearcher searcher = new IndexSearcher(reader);
+ IndexSearcher searcher = newSearcher(reader);
final SortField sortField = new SortField("my_field", SortField.Type.LONG);
final Sort sort = new Sort(sortField);
final int numHits = 3;
@@ -182,7 +182,7 @@ public class TestSortOptimization extends LuceneTestCase {
}
final IndexReader reader = DirectoryReader.open(writer);
writer.close();
- IndexSearcher searcher = new IndexSearcher(reader);
+ IndexSearcher searcher = newSearcher(reader);
final int numHits = 3;
final int totalHitsThreshold = 3;
@@ -220,7 +220,7 @@ public class TestSortOptimization extends LuceneTestCase {
public void testSortOptimizationEqualValues() throws IOException {
final Directory dir = newDirectory();
final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
- final int numDocs = atLeast(10000);
+ final int numDocs = atLeast(TEST_NIGHTLY ? 50_000 : 10_000);
for (int i = 1; i <= numDocs; ++i) {
final Document doc = new Document();
doc.add(
@@ -234,7 +234,7 @@ public class TestSortOptimization extends LuceneTestCase {
}
final IndexReader reader = DirectoryReader.open(writer);
writer.close();
- IndexSearcher searcher = new IndexSearcher(reader);
+ IndexSearcher searcher = newSearcher(reader);
final int numHits = 3;
final int totalHitsThreshold = 3;
@@ -313,7 +313,7 @@ public class TestSortOptimization extends LuceneTestCase {
}
final IndexReader reader = DirectoryReader.open(writer);
writer.close();
- IndexSearcher searcher = new IndexSearcher(reader);
+ IndexSearcher searcher = newSearcher(reader);
final SortField sortField = new SortField("my_field", SortField.Type.FLOAT);
final Sort sort = new Sort(sortField);
final int numHits = 3;
@@ -661,7 +661,7 @@ public class TestSortOptimization extends LuceneTestCase {
}
IndexReader reader = DirectoryReader.open(writer);
writer.close();
- IndexSearcher searcher = new IndexSearcher(reader);
+ IndexSearcher searcher = newSearcher(reader);
SortField sortField = new SortField("my_field", SortField.Type.LONG);
TopFieldDocs topDocs =
searcher.search(new MatchAllDocsQuery(), 1 + random().nextInt(100), new Sort(sortField));
@@ -706,7 +706,7 @@ public class TestSortOptimization extends LuceneTestCase {
seqNos.sort(Long::compare);
IndexReader reader = DirectoryReader.open(writer);
writer.close();
- IndexSearcher searcher = new IndexSearcher(reader);
+ IndexSearcher searcher = newSearcher(reader);
SortField sortField = new SortField("seq_no", SortField.Type.LONG);
int visitedHits = 0;
ScoreDoc after = null;
diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingBulkScorer.java b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingBulkScorer.java
index 2e7cc61..058661b 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingBulkScorer.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingBulkScorer.java
@@ -80,8 +80,22 @@ final class AssertingBulkScorer extends BulkScorer {
assert min <= max : "max must be greater than min, got min=" + min + ", and max=" + max;
this.max = max;
collector = new AssertingLeafCollector(collector, min, max);
- final int next = in.score(collector, acceptDocs, min, max);
- assert next >= max;
+ int next = min;
+ while (next < max) {
+ final int upTo;
+ if (random.nextBoolean()) {
+ upTo = max;
+ } else {
+ final int interval;
+ if (random.nextInt(100) <= 5) {
+ interval = 1 + random.nextInt(10);
+ } else {
+ interval = 1 + random.nextInt(random.nextBoolean() ? 100 : 5000);
+ }
+ upTo = Math.toIntExact(Math.min(min + interval, max));
+ }
+ next = in.score(collector, acceptDocs, min, upTo);
+ }
if (max >= maxDoc || next >= maxDoc) {
assert next == DocIdSetIterator.NO_MORE_DOCS;
return DocIdSetIterator.NO_MORE_DOCS;