You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ji...@apache.org on 2021/11/18 17:08:45 UTC
[lucene] branch branch_9_0 updated: LUCENE-10208: Ensure that the minimum competitive score does not decrease in concurrent search (#431)
This is an automated email from the ASF dual-hosted git repository.
jimczi pushed a commit to branch branch_9_0
in repository https://gitbox.apache.org/repos/asf/lucene.git
The following commit(s) were added to refs/heads/branch_9_0 by this push:
new 0018567 LUCENE-10208: Ensure that the minimum competitive score does not decrease in concurrent search (#431)
0018567 is described below
commit 0018567e8b16f3ba2f88a912afdc6367bb3ce675
Author: Jim Ferenczi <ji...@elastic.co>
AuthorDate: Tue Nov 9 11:04:17 2021 +0100
LUCENE-10208: Ensure that the minimum competitive score does not decrease in concurrent search (#431)
Co-authored-by: Adrien Grand <jp...@gmail.com>
---
lucene/CHANGES.txt | 2 +
.../apache/lucene/search/MaxScoreAccumulator.java | 50 ++++++++++++++------
.../apache/lucene/search/TopFieldCollector.java | 12 +++--
.../apache/lucene/search/TopScoreDocCollector.java | 24 +++++++---
.../lucene/search/TestMaxScoreAccumulator.java | 22 ++++++---
.../apache/lucene/search/TestTopDocsCollector.java | 42 ++++++-----------
.../lucene/search/TestTopFieldCollector.java | 53 ++++++----------------
7 files changed, 108 insertions(+), 97 deletions(-)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 1c6d089..f4a5557 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -496,6 +496,8 @@ Bug Fixes
* LUCENE-10154: NumericLeafComparator to define getPointValues. (Mayya Sharipova, Adrien Grand)
+* LUCENE-10208: Ensure that the minimum competitive score does not decrease in concurrent search. (Jim Ferenczi, Adrien Grand)
+
Build
---------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/MaxScoreAccumulator.java b/lucene/core/src/java/org/apache/lucene/search/MaxScoreAccumulator.java
index fa66238..b0f60f8 100644
--- a/lucene/core/src/java/org/apache/lucene/search/MaxScoreAccumulator.java
+++ b/lucene/core/src/java/org/apache/lucene/search/MaxScoreAccumulator.java
@@ -26,7 +26,7 @@ final class MaxScoreAccumulator {
static final int DEFAULT_INTERVAL = 0x3ff;
// scores are always positive
- final LongAccumulator acc = new LongAccumulator(Long::max, Long.MIN_VALUE);
+ final LongAccumulator acc = new LongAccumulator(MaxScoreAccumulator::maxEncode, Long.MIN_VALUE);
// non-final and visible for tests
long modInterval;
@@ -35,9 +35,26 @@ final class MaxScoreAccumulator {
this.modInterval = DEFAULT_INTERVAL;
}
- void accumulate(int docID, float score) {
- assert docID >= 0 && score >= 0;
- long encode = (((long) Float.floatToIntBits(score)) << 32) | docID;
+ /**
+ * Return the max encoded DocAndScore in a way that is consistent with {@link
+ * DocAndScore#compareTo}.
+ */
+ private static long maxEncode(long v1, long v2) {
+ float score1 = Float.intBitsToFloat((int) (v1 >> 32));
+ float score2 = Float.intBitsToFloat((int) (v2 >> 32));
+ int cmp = Float.compare(score1, score2);
+ if (cmp == 0) {
+ // tie-break on the minimum doc base
+ return (int) v1 < (int) v2 ? v1 : v2;
+ } else if (cmp > 0) {
+ return v1;
+ }
+ return v2;
+ }
+
+ void accumulate(int docBase, float score) {
+ assert docBase >= 0 && score >= 0;
+ long encode = (((long) Float.floatToIntBits(score)) << 32) | docBase;
acc.accumulate(encode);
}
@@ -47,16 +64,16 @@ final class MaxScoreAccumulator {
return null;
}
float score = Float.intBitsToFloat((int) (value >> 32));
- int docID = (int) value;
- return new DocAndScore(docID, score);
+ int docBase = (int) value;
+ return new DocAndScore(docBase, score);
}
static class DocAndScore implements Comparable<DocAndScore> {
- final int docID;
+ final int docBase;
final float score;
- DocAndScore(int docID, float score) {
- this.docID = docID;
+ DocAndScore(int docBase, float score) {
+ this.docBase = docBase;
this.score = score;
}
@@ -64,7 +81,14 @@ final class MaxScoreAccumulator {
public int compareTo(DocAndScore o) {
int cmp = Float.compare(score, o.score);
if (cmp == 0) {
- return Integer.compare(docID, o.docID);
+ // tie-break on the minimum doc base
+ // For a given minimum competitive score, we want to know the first segment
+ // where this score occurred, hence the reverse order here.
+ // On segments with a lower docBase, any document whose score is greater
+ // than or equal to this score would be competitive, while on segments with a
+ // higher docBase, documents need to have a strictly greater score to be
+ // competitive since we tie break on doc ID.
+ return Integer.compare(o.docBase, docBase);
}
return cmp;
}
@@ -74,17 +98,17 @@ final class MaxScoreAccumulator {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
DocAndScore result = (DocAndScore) o;
- return docID == result.docID && Float.compare(result.score, score) == 0;
+ return docBase == result.docBase && Float.compare(result.score, score) == 0;
}
@Override
public int hashCode() {
- return Objects.hash(docID, score);
+ return Objects.hash(docBase, score);
}
@Override
public String toString() {
- return "DocAndScore{" + "docID=" + docID + ", score=" + score + '}';
+ return "DocAndScore{" + "docBase=" + docBase + ", score=" + score + '}';
}
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java b/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
index db97704..0023d71 100644
--- a/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
+++ b/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
@@ -134,9 +134,9 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
public void setScorer(Scorable scorer) throws IOException {
this.scorer = scorer;
comparator.setScorer(scorer);
- minCompetitiveScore = 0f;
- updateMinCompetitiveScore(scorer);
- if (minScoreAcc != null) {
+ if (minScoreAcc == null) {
+ updateMinCompetitiveScore(scorer);
+ } else {
updateGlobalMinCompetitiveScore(scorer);
}
}
@@ -191,6 +191,8 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
@Override
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
+ // reset the minimum competitive score
+ minCompetitiveScore = 0f;
docBase = context.docBase;
return new TopFieldLeafCollector(queue, sort, context) {
@@ -244,6 +246,8 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
@Override
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
+ // reset the minimum competitive score
+ minCompetitiveScore = 0f;
docBase = context.docBase;
final int afterDoc = after.doc - docBase;
@@ -363,7 +367,7 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
minCompetitiveScore = minScore;
totalHitsRelation = TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO;
if (minScoreAcc != null) {
- minScoreAcc.accumulate(bottom.doc, minScore);
+ minScoreAcc.accumulate(docBase, minScore);
}
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java b/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java
index 0f7b526..0c4ccdc 100644
--- a/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java
+++ b/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java
@@ -55,14 +55,15 @@ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
// reset the minimum competitive score
docBase = context.docBase;
- return new ScorerLeafCollector() {
+ minCompetitiveScore = 0f;
+ return new ScorerLeafCollector() {
@Override
public void setScorer(Scorable scorer) throws IOException {
super.setScorer(scorer);
- minCompetitiveScore = 0f;
- updateMinCompetitiveScore(scorer);
- if (minScoreAcc != null) {
+ if (minScoreAcc == null) {
+ updateMinCompetitiveScore(scorer);
+ } else {
updateGlobalMinCompetitiveScore(scorer);
}
}
@@ -132,9 +133,20 @@ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
docBase = context.docBase;
final int afterDoc = after.doc - context.docBase;
+ minCompetitiveScore = 0f;
return new ScorerLeafCollector() {
@Override
+ public void setScorer(Scorable scorer) throws IOException {
+ super.setScorer(scorer);
+ if (minScoreAcc == null) {
+ updateMinCompetitiveScore(scorer);
+ } else {
+ updateGlobalMinCompetitiveScore(scorer);
+ }
+ }
+
+ @Override
public void collect(int doc) throws IOException {
float score = scorer.score();
@@ -307,7 +319,7 @@ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
// the next float if the global minimum score is set on a document id that is
// smaller than the ids in the current leaf
float score =
- docBase > maxMinScore.docID ? Math.nextUp(maxMinScore.score) : maxMinScore.score;
+ docBase >= maxMinScore.docBase ? Math.nextUp(maxMinScore.score) : maxMinScore.score;
if (score > minCompetitiveScore) {
assert hitsThresholdChecker.isThresholdReached();
scorer.setMinCompetitiveScore(score);
@@ -332,7 +344,7 @@ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
// we don't use the next float but we register the document
// id so that other leaves can require it if they are after
// the current maximum
- minScoreAcc.accumulate(pqTop.doc, pqTop.score);
+ minScoreAcc.accumulate(docBase, pqTop.score);
}
}
}
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestMaxScoreAccumulator.java b/lucene/core/src/test/org/apache/lucene/search/TestMaxScoreAccumulator.java
index d53a2a2..28bec9c 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestMaxScoreAccumulator.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestMaxScoreAccumulator.java
@@ -23,21 +23,29 @@ public class TestMaxScoreAccumulator extends LuceneTestCase {
public void testSimple() {
MaxScoreAccumulator acc = new MaxScoreAccumulator();
acc.accumulate(0, 0f);
+ assertEquals(0f, acc.get().score, 0);
+ assertEquals(0, acc.get().docBase, 0);
acc.accumulate(10, 0f);
assertEquals(0f, acc.get().score, 0);
- assertEquals(10, acc.get().docID, 0);
+ assertEquals(0, acc.get().docBase, 0);
acc.accumulate(100, 1000f);
assertEquals(1000f, acc.get().score, 0);
- assertEquals(100, acc.get().docID, 0);
+ assertEquals(100, acc.get().docBase, 0);
acc.accumulate(1000, 5f);
assertEquals(1000f, acc.get().score, 0);
- assertEquals(100, acc.get().docID, 0);
+ assertEquals(100, acc.get().docBase, 0);
acc.accumulate(99, 1000f);
assertEquals(1000f, acc.get().score, 0);
- assertEquals(100, acc.get().docID, 0);
- acc.accumulate(0, 1001f);
+ assertEquals(99, acc.get().docBase, 0);
+ acc.accumulate(1000, 1001f);
+ assertEquals(1001f, acc.get().score, 0);
+ assertEquals(1000, acc.get().docBase, 0);
+ acc.accumulate(10, 1001f);
+ assertEquals(1001f, acc.get().score, 0);
+ assertEquals(10, acc.get().docBase, 0);
+ acc.accumulate(100, 1001f);
assertEquals(1001f, acc.get().score, 0);
- assertEquals(0, acc.get().docID, 0);
+ assertEquals(10, acc.get().docBase, 0);
}
public void testRandom() {
@@ -48,7 +56,7 @@ public class TestMaxScoreAccumulator extends LuceneTestCase {
for (int i = 0; i < numDocs; i++) {
MaxScoreAccumulator.DocAndScore res =
new MaxScoreAccumulator.DocAndScore(random().nextInt(maxDocs), random().nextFloat());
- acc.accumulate(res.docID, res.score);
+ acc.accumulate(res.docBase, res.score);
if (res.compareTo(max) > 0) {
max = res;
}
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestTopDocsCollector.java b/lucene/core/src/test/org/apache/lucene/search/TestTopDocsCollector.java
index 35b5dbd..ceecb82 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestTopDocsCollector.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestTopDocsCollector.java
@@ -18,10 +18,6 @@ package org.apache.lucene.search;
import java.io.IOException;
import java.util.Arrays;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.LinkedBlockingQueue;
-import java.util.concurrent.ThreadPoolExecutor;
-import java.util.concurrent.TimeUnit;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
@@ -41,7 +37,6 @@ import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LineFileDocs;
import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util.NamedThreadFactory;
public class TestTopDocsCollector extends LuceneTestCase {
@@ -141,7 +136,7 @@ public class TestTopDocsCollector extends LuceneTestCase {
private TopDocsCollector<ScoreDoc> doSearchWithThreshold(
int numResults, int thresHold, Query q, IndexReader indexReader) throws IOException {
- IndexSearcher searcher = new IndexSearcher(indexReader);
+ IndexSearcher searcher = newSearcher(indexReader, true, true, false);
TopDocsCollector<ScoreDoc> tdc = TopScoreDocCollector.create(numResults, thresHold);
searcher.search(q, tdc);
return tdc;
@@ -149,24 +144,10 @@ public class TestTopDocsCollector extends LuceneTestCase {
private TopDocs doConcurrentSearchWithThreshold(
int numResults, int threshold, Query q, IndexReader indexReader) throws IOException {
- ExecutorService service =
- new ThreadPoolExecutor(
- 4,
- 4,
- 0L,
- TimeUnit.MILLISECONDS,
- new LinkedBlockingQueue<Runnable>(),
- new NamedThreadFactory("TestTopDocsCollector"));
- try {
- IndexSearcher searcher = new IndexSearcher(indexReader, service);
-
- CollectorManager<TopScoreDocCollector, TopDocs> collectorManager =
- TopScoreDocCollector.createSharedManager(numResults, null, threshold);
-
- return searcher.search(q, collectorManager);
- } finally {
- service.shutdown();
- }
+ IndexSearcher searcher = newSearcher(indexReader, true, true, true);
+ CollectorManager<TopScoreDocCollector, TopDocs> collectorManager =
+ TopScoreDocCollector.createSharedManager(numResults, null, threshold);
+ return searcher.search(q, collectorManager);
}
@Override
@@ -303,8 +284,9 @@ public class TestTopDocsCollector extends LuceneTestCase {
Float minCompetitiveScore = null;
@Override
- public void setMinCompetitiveScore(float minCompetitiveScore) {
- this.minCompetitiveScore = minCompetitiveScore;
+ public void setMinCompetitiveScore(float score) {
+ assert minCompetitiveScore == null || score >= minCompetitiveScore;
+ this.minCompetitiveScore = score;
}
@Override
@@ -356,9 +338,9 @@ public class TestTopDocsCollector extends LuceneTestCase {
scorer.doc = 3;
scorer.score = 0.5f;
// Make sure we do not call setMinCompetitiveScore for non-competitive hits
- scorer.minCompetitiveScore = Float.NaN;
+ scorer.minCompetitiveScore = null;
leafCollector.collect(3);
- assertTrue(Float.isNaN(scorer.minCompetitiveScore));
+ assertNull(scorer.minCompetitiveScore);
scorer.doc = 4;
scorer.score = 4;
@@ -613,6 +595,10 @@ public class TestTopDocsCollector extends LuceneTestCase {
assertEquals(11, topDocs.totalHits.value);
assertEquals(new TotalHits(11, TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO), topDocs.totalHits);
+ leafCollector.setScorer(scorer);
+ leafCollector2.setScorer(scorer2);
+ leafCollector3.setScorer(scorer3);
+
reader.close();
dir.close();
}
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java b/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java
index 8419caa..9d99f87 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java
@@ -21,10 +21,6 @@ import static org.apache.lucene.search.SortField.FIELD_SCORE;
import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.LinkedBlockingQueue;
-import java.util.concurrent.ThreadPoolExecutor;
-import java.util.concurrent.TimeUnit;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
@@ -42,7 +38,6 @@ import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.FieldValueHitQueue.Entry;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util.NamedThreadFactory;
import org.apache.lucene.util.TestUtil;
public class TestTopFieldCollector extends LuceneTestCase {
@@ -75,7 +70,7 @@ public class TestTopFieldCollector extends LuceneTestCase {
private TopFieldCollector doSearchWithThreshold(
int numResults, int thresHold, Query q, Sort sort, IndexReader indexReader)
throws IOException {
- IndexSearcher searcher = new IndexSearcher(indexReader);
+ IndexSearcher searcher = newSearcher(indexReader);
TopFieldCollector tdc = TopFieldCollector.create(sort, numResults, thresHold);
searcher.search(q, tdc);
return tdc;
@@ -84,26 +79,14 @@ public class TestTopFieldCollector extends LuceneTestCase {
private TopDocs doConcurrentSearchWithThreshold(
int numResults, int threshold, Query q, Sort sort, IndexReader indexReader)
throws IOException {
- ExecutorService service =
- new ThreadPoolExecutor(
- 4,
- 4,
- 0L,
- TimeUnit.MILLISECONDS,
- new LinkedBlockingQueue<Runnable>(),
- new NamedThreadFactory("TestTopDocsCollector"));
- try {
- IndexSearcher searcher = new IndexSearcher(indexReader, service);
-
- CollectorManager<TopFieldCollector, TopFieldDocs> collectorManager =
- TopFieldCollector.createSharedManager(sort, numResults, null, threshold);
-
- TopDocs tdc = searcher.search(q, collectorManager);
-
- return tdc;
- } finally {
- service.shutdown();
- }
+ IndexSearcher searcher = newSearcher(indexReader, true, true, true);
+
+ CollectorManager<TopFieldCollector, TopFieldDocs> collectorManager =
+ TopFieldCollector.createSharedManager(sort, numResults, null, threshold);
+
+ TopDocs tdc = searcher.search(q, collectorManager);
+
+ return tdc;
}
public void testSortWithoutFillFields() throws Exception {
@@ -146,17 +129,7 @@ public class TestTopFieldCollector extends LuceneTestCase {
}
public void testSharedHitcountCollector() throws Exception {
-
- ExecutorService service =
- new ThreadPoolExecutor(
- 4,
- 4,
- 0L,
- TimeUnit.MILLISECONDS,
- new LinkedBlockingQueue<Runnable>(),
- new NamedThreadFactory("TestTopFieldCollector"));
-
- IndexSearcher concurrentSearcher = new IndexSearcher(ir, service);
+ IndexSearcher concurrentSearcher = newSearcher(ir, true, true, true);
// Two Sort criteria to instantiate the multi/single comparators.
Sort[] sort = new Sort[] {new Sort(SortField.FIELD_DOC), new Sort()};
@@ -178,8 +151,6 @@ public class TestTopFieldCollector extends LuceneTestCase {
CheckHits.checkEqual(q, td.scoreDocs, td2.scoreDocs);
}
-
- service.shutdown();
}
public void testSortWithoutTotalHitTracking() throws Exception {
@@ -678,6 +649,10 @@ public class TestTopFieldCollector extends LuceneTestCase {
assertEquals(11, topDocs.totalHits.value);
assertEquals(new TotalHits(11, TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO), topDocs.totalHits);
+ leafCollector.setScorer(scorer);
+ leafCollector2.setScorer(scorer2);
+ leafCollector3.setScorer(scorer3);
+
reader.close();
dir.close();
}