You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by tf...@apache.org on 2020/04/23 20:27:11 UTC
[lucene-solr] branch branch_8x updated: LUCENE-9342: Collector's
totalHitsThreshold should not be lower than numHits (#1448)
This is an automated email from the ASF dual-hosted git repository.
tflobbe pushed a commit to branch branch_8x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
The following commit(s) were added to refs/heads/branch_8x by this push:
new edd00d9 LUCENE-9342: Collector's totalHitsThreshold should not be lower than numHits (#1448)
edd00d9 is described below
commit edd00d933f6144293d74fc727fec6190f28c57a0
Author: Tomas Fernandez Lobbe <tf...@apache.org>
AuthorDate: Thu Apr 23 12:04:02 2020 -0700
LUCENE-9342: Collector's totalHitsThreshold should not be lower than numHits (#1448)
Use the maximum of the two, this is so that relation is EQUAL_TO in the case of the number of hits in a query is less than the collector's numHits
---
lucene/CHANGES.txt | 3 ++
.../org/apache/lucene/search/IndexSearcher.java | 8 +--
.../apache/lucene/search/TopFieldCollector.java | 4 +-
.../apache/lucene/search/TopScoreDocCollector.java | 4 +-
.../apache/lucene/search/TestTopDocsCollector.java | 59 ++++++++++++++++++----
.../lucene/search/TestTopFieldCollector.java | 58 +++++++++++++++++----
6 files changed, 106 insertions(+), 30 deletions(-)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 331ce67..4ebed10 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -37,6 +37,9 @@ Improvements
* LUCENE-9324: Add an ID to SegmentCommitInfo in order to compare commits for equality and make
snapshots incremental on generational files. (Simon Willnauer, Mike Mccandless, Adrien Grant)
+* LUCENE-9342: TotalHits' relation will be EQUAL_TO when the number of hits is lower than TopDocsColector's numHits
+ (Tomás Fernández Löbbe)
+
Optimizations
---------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
index 41957d1..e993832 100644
--- a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
+++ b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
@@ -396,8 +396,8 @@ public class IndexSearcher {
final CollectorManager<TopScoreDocCollector, TopDocs> manager = new CollectorManager<TopScoreDocCollector, TopDocs>() {
- private final HitsThresholdChecker hitsThresholdChecker = (executor == null || leafSlices.length <= 1) ? HitsThresholdChecker.create(TOTAL_HITS_THRESHOLD) :
- HitsThresholdChecker.createShared(TOTAL_HITS_THRESHOLD);
+ private final HitsThresholdChecker hitsThresholdChecker = (executor == null || leafSlices.length <= 1) ? HitsThresholdChecker.create(Math.max(TOTAL_HITS_THRESHOLD, numHits)) :
+ HitsThresholdChecker.createShared(Math.max(TOTAL_HITS_THRESHOLD, numHits));
private final MaxScoreAccumulator minScoreAcc = (executor == null || leafSlices.length <= 1) ? null : new MaxScoreAccumulator();
@@ -529,8 +529,8 @@ public class IndexSearcher {
final CollectorManager<TopFieldCollector, TopFieldDocs> manager = new CollectorManager<TopFieldCollector, TopFieldDocs>() {
- private final HitsThresholdChecker hitsThresholdChecker = (executor == null || leafSlices.length <= 1) ? HitsThresholdChecker.create(TOTAL_HITS_THRESHOLD) :
- HitsThresholdChecker.createShared(TOTAL_HITS_THRESHOLD);
+ private final HitsThresholdChecker hitsThresholdChecker = (executor == null || leafSlices.length <= 1) ? HitsThresholdChecker.create(Math.max(TOTAL_HITS_THRESHOLD, numHits)) :
+ HitsThresholdChecker.createShared(Math.max(TOTAL_HITS_THRESHOLD, numHits));
private final MaxScoreAccumulator minScoreAcc = (executor == null || leafSlices.length <= 1) ? null : new MaxScoreAccumulator();
diff --git a/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java b/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
index 699c5a7..6a71279 100644
--- a/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
+++ b/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
@@ -447,7 +447,7 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
throw new IllegalArgumentException("totalHitsThreshold must be >= 0, got " + totalHitsThreshold);
}
- return create(sort, numHits, after, HitsThresholdChecker.create(totalHitsThreshold), null /* bottomValueChecker */);
+ return create(sort, numHits, after, HitsThresholdChecker.create(Math.max(totalHitsThreshold, numHits)), null /* bottomValueChecker */);
}
/**
@@ -494,7 +494,7 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
int totalHitsThreshold) {
return new CollectorManager<TopFieldCollector, TopFieldDocs>() {
- private final HitsThresholdChecker hitsThresholdChecker = HitsThresholdChecker.createShared(totalHitsThreshold);
+ private final HitsThresholdChecker hitsThresholdChecker = HitsThresholdChecker.createShared(Math.max(totalHitsThreshold, numHits));
private final MaxScoreAccumulator minScoreAcc = new MaxScoreAccumulator();
@Override
diff --git a/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java b/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java
index bdad42e..adbbafc 100644
--- a/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java
+++ b/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java
@@ -217,7 +217,7 @@ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
* objects.
*/
public static TopScoreDocCollector create(int numHits, ScoreDoc after, int totalHitsThreshold) {
- return create(numHits, after, HitsThresholdChecker.create(totalHitsThreshold), null);
+ return create(numHits, after, HitsThresholdChecker.create(Math.max(totalHitsThreshold, numHits)), null);
}
static TopScoreDocCollector create(int numHits, ScoreDoc after, HitsThresholdChecker hitsThresholdChecker,
@@ -246,7 +246,7 @@ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
int totalHitsThreshold) {
return new CollectorManager<TopScoreDocCollector, TopDocs>() {
- private final HitsThresholdChecker hitsThresholdChecker = HitsThresholdChecker.createShared(totalHitsThreshold);
+ private final HitsThresholdChecker hitsThresholdChecker = HitsThresholdChecker.createShared(Math.max(totalHitsThreshold, numHits));
private final MaxScoreAccumulator minScoreAcc = new MaxScoreAccumulator();
@Override
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestTopDocsCollector.java b/lucene/core/src/test/org/apache/lucene/search/TestTopDocsCollector.java
index e0a2b6c..544094a 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestTopDocsCollector.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestTopDocsCollector.java
@@ -27,6 +27,8 @@ import java.util.concurrent.TimeUnit;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
@@ -261,7 +263,7 @@ public class TestTopDocsCollector extends LuceneTestCase {
assertEquals(2, reader.leaves().size());
w.close();
- TopScoreDocCollector collector = TopScoreDocCollector.create(2, null, 1);
+ TopScoreDocCollector collector = TopScoreDocCollector.create(2, null, 2);
ScoreAndDoc scorer = new ScoreAndDoc();
LeafCollector leafCollector = collector.getLeafCollector(reader.leaves().get(0));
@@ -276,35 +278,40 @@ public class TestTopDocsCollector extends LuceneTestCase {
scorer.doc = 1;
scorer.score = 2;
leafCollector.collect(1);
- assertEquals(Math.nextUp(1f), scorer.minCompetitiveScore, 0f);
-
+ assertNull(scorer.minCompetitiveScore);
+
scorer.doc = 2;
+ scorer.score = 3;
+ leafCollector.collect(2);
+ assertEquals(Math.nextUp(2f), scorer.minCompetitiveScore, 0f);
+
+ scorer.doc = 3;
scorer.score = 0.5f;
// Make sure we do not call setMinCompetitiveScore for non-competitive hits
scorer.minCompetitiveScore = Float.NaN;
- leafCollector.collect(2);
+ leafCollector.collect(3);
assertTrue(Float.isNaN(scorer.minCompetitiveScore));
- scorer.doc = 3;
+ scorer.doc = 4;
scorer.score = 4;
- leafCollector.collect(3);
- assertEquals(Math.nextUp(2f), scorer.minCompetitiveScore, 0f);
+ leafCollector.collect(4);
+ assertEquals(Math.nextUp(3f), scorer.minCompetitiveScore, 0f);
// Make sure the min score is set on scorers on new segments
scorer = new ScoreAndDoc();
leafCollector = collector.getLeafCollector(reader.leaves().get(1));
leafCollector.setScorer(scorer);
- assertEquals(Math.nextUp(2f), scorer.minCompetitiveScore, 0f);
+ assertEquals(Math.nextUp(3f), scorer.minCompetitiveScore, 0f);
scorer.doc = 0;
scorer.score = 1;
leafCollector.collect(0);
- assertEquals(Math.nextUp(2f), scorer.minCompetitiveScore, 0f);
+ assertEquals(Math.nextUp(3f), scorer.minCompetitiveScore, 0f);
scorer.doc = 1;
- scorer.score = 3;
+ scorer.score = 4;
leafCollector.collect(1);
- assertEquals(Math.nextUp(3f), scorer.minCompetitiveScore, 0f);
+ assertEquals(Math.nextUp(4f), scorer.minCompetitiveScore, 0f);
reader.close();
dir.close();
@@ -380,6 +387,36 @@ public class TestTopDocsCollector extends LuceneTestCase {
reader.close();
dir.close();
}
+
+ public void testRelationVsTopDocsCount() throws Exception {
+ try (Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE))) {
+ Document doc = new Document();
+ doc.add(new TextField("f", "foo bar", Store.NO));
+ w.addDocuments(Arrays.asList(doc, doc, doc, doc, doc));
+ w.flush();
+ w.addDocuments(Arrays.asList(doc, doc, doc, doc, doc));
+ w.flush();
+
+ try (IndexReader reader = DirectoryReader.open(w)) {
+ IndexSearcher searcher = new IndexSearcher(reader);
+ TopScoreDocCollector collector = TopScoreDocCollector.create(2, null, 10);
+ searcher.search(new TermQuery(new Term("f", "foo")), collector);
+ assertEquals(10, collector.totalHits);
+ assertEquals(TotalHits.Relation.EQUAL_TO, collector.totalHitsRelation);
+
+ collector = TopScoreDocCollector.create(2, null, 2);
+ searcher.search(new TermQuery(new Term("f", "foo")), collector);
+ assertTrue(10 >= collector.totalHits);
+ assertEquals(TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO, collector.totalHitsRelation);
+
+ collector = TopScoreDocCollector.create(10, null, 2);
+ searcher.search(new TermQuery(new Term("f", "foo")), collector);
+ assertEquals(10, collector.totalHits);
+ assertEquals(TotalHits.Relation.EQUAL_TO, collector.totalHitsRelation);
+ }
+ }
+ }
public void testConcurrentMinScore() throws Exception {
Directory dir = newDirectory();
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java b/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java
index ed814664..7759891 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java
@@ -294,7 +294,7 @@ public class TestTopFieldCollector extends LuceneTestCase {
w.close();
Sort sort = new Sort(FIELD_SCORE, new SortField("foo", SortField.Type.LONG));
- TopFieldCollector collector = TopFieldCollector.create(sort, 2, null, 1);
+ TopFieldCollector collector = TopFieldCollector.create(sort, 2, null, 2);
ScoreAndDoc scorer = new ScoreAndDoc();
LeafCollector leafCollector = collector.getLeafCollector(reader.leaves().get(0));
@@ -309,35 +309,40 @@ public class TestTopFieldCollector extends LuceneTestCase {
scorer.doc = 1;
scorer.score = 2;
leafCollector.collect(1);
- assertEquals(1f, scorer.minCompetitiveScore, 0f);
-
+ assertNull(scorer.minCompetitiveScore);
+
scorer.doc = 2;
+ scorer.score = 3;
+ leafCollector.collect(2);
+ assertEquals(2f, scorer.minCompetitiveScore, 0f);
+
+ scorer.doc = 3;
scorer.score = 0.5f;
// Make sure we do not call setMinCompetitiveScore for non-competitive hits
scorer.minCompetitiveScore = Float.NaN;
- leafCollector.collect(2);
+ leafCollector.collect(3);
assertTrue(Float.isNaN(scorer.minCompetitiveScore));
- scorer.doc = 3;
+ scorer.doc = 4;
scorer.score = 4;
- leafCollector.collect(3);
- assertEquals(2f, scorer.minCompetitiveScore, 0f);
+ leafCollector.collect(4);
+ assertEquals(3f, scorer.minCompetitiveScore, 0f);
// Make sure the min score is set on scorers on new segments
scorer = new ScoreAndDoc();
leafCollector = collector.getLeafCollector(reader.leaves().get(1));
leafCollector.setScorer(scorer);
- assertEquals(2f, scorer.minCompetitiveScore, 0f);
+ assertEquals(3f, scorer.minCompetitiveScore, 0f);
scorer.doc = 0;
scorer.score = 1;
leafCollector.collect(0);
- assertEquals(2f, scorer.minCompetitiveScore, 0f);
+ assertEquals(3f, scorer.minCompetitiveScore, 0f);
scorer.doc = 1;
- scorer.score = 3;
+ scorer.score = 4;
leafCollector.collect(1);
- assertEquals(3f, scorer.minCompetitiveScore, 0f);
+ assertEquals(4f, scorer.minCompetitiveScore, 0f);
reader.close();
dir.close();
@@ -690,5 +695,36 @@ public class TestTopFieldCollector extends LuceneTestCase {
indexReader.close();
dir.close();
}
+
+ public void testRelationVsTopDocsCount() throws Exception {
+ Sort sort = new Sort(SortField.FIELD_SCORE, SortField.FIELD_DOC);
+ try (Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE))) {
+ Document doc = new Document();
+ doc.add(new TextField("f", "foo bar", Store.NO));
+ w.addDocuments(Arrays.asList(doc, doc, doc, doc, doc));
+ w.flush();
+ w.addDocuments(Arrays.asList(doc, doc, doc, doc, doc));
+ w.flush();
+
+ try (IndexReader reader = DirectoryReader.open(w)) {
+ IndexSearcher searcher = new IndexSearcher(reader);
+ TopFieldCollector collector = TopFieldCollector.create(sort, 2, 10);
+ searcher.search(new TermQuery(new Term("f", "foo")), collector);
+ assertEquals(10, collector.totalHits);
+ assertEquals(TotalHits.Relation.EQUAL_TO, collector.totalHitsRelation);
+
+ collector = TopFieldCollector.create(sort, 2, 2);
+ searcher.search(new TermQuery(new Term("f", "foo")), collector);
+ assertTrue(10 >= collector.totalHits);
+ assertEquals(TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO, collector.totalHitsRelation);
+
+ collector = TopFieldCollector.create(sort, 10, 2);
+ searcher.search(new TermQuery(new Term("f", "foo")), collector);
+ assertEquals(10, collector.totalHits);
+ assertEquals(TotalHits.Relation.EQUAL_TO, collector.totalHitsRelation);
+ }
+ }
+ }
}