You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by tf...@apache.org on 2020/04/23 20:27:11 UTC

[lucene-solr] branch branch_8x updated: LUCENE-9342: Collector's totalHitsThreshold should not be lower than numHits (#1448)

This is an automated email from the ASF dual-hosted git repository.

tflobbe pushed a commit to branch branch_8x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/branch_8x by this push:
     new edd00d9  LUCENE-9342: Collector's totalHitsThreshold should not be lower than numHits (#1448)
edd00d9 is described below

commit edd00d933f6144293d74fc727fec6190f28c57a0
Author: Tomas Fernandez Lobbe <tf...@apache.org>
AuthorDate: Thu Apr 23 12:04:02 2020 -0700

    LUCENE-9342: Collector's totalHitsThreshold should not be lower than numHits (#1448)
    
    Use the maximum of the two, this is so that relation is EQUAL_TO in the case of the number of hits in a query is less than the collector's numHits
---
 lucene/CHANGES.txt                                 |  3 ++
 .../org/apache/lucene/search/IndexSearcher.java    |  8 +--
 .../apache/lucene/search/TopFieldCollector.java    |  4 +-
 .../apache/lucene/search/TopScoreDocCollector.java |  4 +-
 .../apache/lucene/search/TestTopDocsCollector.java | 59 ++++++++++++++++++----
 .../lucene/search/TestTopFieldCollector.java       | 58 +++++++++++++++++----
 6 files changed, 106 insertions(+), 30 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 331ce67..4ebed10 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -37,6 +37,9 @@ Improvements
 * LUCENE-9324: Add an ID to SegmentCommitInfo in order to compare commits for equality and make
   snapshots incremental on generational files. (Simon Willnauer, Mike Mccandless, Adrien Grant)
 
+* LUCENE-9342: TotalHits' relation will be EQUAL_TO when the number of hits is lower than TopDocsColector's numHits
+  (Tomás Fernández Löbbe)
+
 Optimizations
 ---------------------
 
diff --git a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
index 41957d1..e993832 100644
--- a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
+++ b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
@@ -396,8 +396,8 @@ public class IndexSearcher {
 
     final CollectorManager<TopScoreDocCollector, TopDocs> manager = new CollectorManager<TopScoreDocCollector, TopDocs>() {
 
-      private final HitsThresholdChecker hitsThresholdChecker = (executor == null || leafSlices.length <= 1) ? HitsThresholdChecker.create(TOTAL_HITS_THRESHOLD) :
-          HitsThresholdChecker.createShared(TOTAL_HITS_THRESHOLD);
+      private final HitsThresholdChecker hitsThresholdChecker = (executor == null || leafSlices.length <= 1) ? HitsThresholdChecker.create(Math.max(TOTAL_HITS_THRESHOLD, numHits)) :
+          HitsThresholdChecker.createShared(Math.max(TOTAL_HITS_THRESHOLD, numHits));
 
       private final MaxScoreAccumulator minScoreAcc = (executor == null || leafSlices.length <= 1) ? null : new MaxScoreAccumulator();
 
@@ -529,8 +529,8 @@ public class IndexSearcher {
 
     final CollectorManager<TopFieldCollector, TopFieldDocs> manager = new CollectorManager<TopFieldCollector, TopFieldDocs>() {
 
-      private final HitsThresholdChecker hitsThresholdChecker = (executor == null || leafSlices.length <= 1) ? HitsThresholdChecker.create(TOTAL_HITS_THRESHOLD) :
-          HitsThresholdChecker.createShared(TOTAL_HITS_THRESHOLD);
+      private final HitsThresholdChecker hitsThresholdChecker = (executor == null || leafSlices.length <= 1) ? HitsThresholdChecker.create(Math.max(TOTAL_HITS_THRESHOLD, numHits)) :
+          HitsThresholdChecker.createShared(Math.max(TOTAL_HITS_THRESHOLD, numHits));
 
       private final MaxScoreAccumulator minScoreAcc = (executor == null || leafSlices.length <= 1) ? null : new MaxScoreAccumulator();
 
diff --git a/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java b/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
index 699c5a7..6a71279 100644
--- a/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
+++ b/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
@@ -447,7 +447,7 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
       throw new IllegalArgumentException("totalHitsThreshold must be >= 0, got " + totalHitsThreshold);
     }
 
-    return create(sort, numHits, after, HitsThresholdChecker.create(totalHitsThreshold), null /* bottomValueChecker */);
+    return create(sort, numHits, after, HitsThresholdChecker.create(Math.max(totalHitsThreshold, numHits)), null /* bottomValueChecker */);
   }
 
   /**
@@ -494,7 +494,7 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
                                                                                  int totalHitsThreshold) {
     return new CollectorManager<TopFieldCollector, TopFieldDocs>() {
 
-      private final HitsThresholdChecker hitsThresholdChecker = HitsThresholdChecker.createShared(totalHitsThreshold);
+      private final HitsThresholdChecker hitsThresholdChecker = HitsThresholdChecker.createShared(Math.max(totalHitsThreshold, numHits));
       private final MaxScoreAccumulator minScoreAcc = new MaxScoreAccumulator();
 
       @Override
diff --git a/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java b/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java
index bdad42e..adbbafc 100644
--- a/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java
+++ b/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java
@@ -217,7 +217,7 @@ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
    * objects.
    */
   public static TopScoreDocCollector create(int numHits, ScoreDoc after, int totalHitsThreshold) {
-    return create(numHits, after, HitsThresholdChecker.create(totalHitsThreshold), null);
+    return create(numHits, after, HitsThresholdChecker.create(Math.max(totalHitsThreshold, numHits)), null);
   }
 
   static TopScoreDocCollector create(int numHits, ScoreDoc after, HitsThresholdChecker hitsThresholdChecker,
@@ -246,7 +246,7 @@ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
                                                                                       int totalHitsThreshold) {
     return new CollectorManager<TopScoreDocCollector, TopDocs>() {
 
-      private final HitsThresholdChecker hitsThresholdChecker = HitsThresholdChecker.createShared(totalHitsThreshold);
+      private final HitsThresholdChecker hitsThresholdChecker = HitsThresholdChecker.createShared(Math.max(totalHitsThreshold, numHits));
       private final MaxScoreAccumulator minScoreAcc = new MaxScoreAccumulator();
 
       @Override
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestTopDocsCollector.java b/lucene/core/src/test/org/apache/lucene/search/TestTopDocsCollector.java
index e0a2b6c..544094a 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestTopDocsCollector.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestTopDocsCollector.java
@@ -27,6 +27,8 @@ import java.util.concurrent.TimeUnit;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.StringField;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.document.Field.Store;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
@@ -261,7 +263,7 @@ public class TestTopDocsCollector extends LuceneTestCase {
     assertEquals(2, reader.leaves().size());
     w.close();
 
-    TopScoreDocCollector collector = TopScoreDocCollector.create(2, null, 1);
+    TopScoreDocCollector collector = TopScoreDocCollector.create(2, null, 2);
     ScoreAndDoc scorer = new ScoreAndDoc();
 
     LeafCollector leafCollector = collector.getLeafCollector(reader.leaves().get(0));
@@ -276,35 +278,40 @@ public class TestTopDocsCollector extends LuceneTestCase {
     scorer.doc = 1;
     scorer.score = 2;
     leafCollector.collect(1);
-    assertEquals(Math.nextUp(1f), scorer.minCompetitiveScore, 0f);
-
+    assertNull(scorer.minCompetitiveScore);
+    
     scorer.doc = 2;
+    scorer.score = 3;
+    leafCollector.collect(2);
+    assertEquals(Math.nextUp(2f), scorer.minCompetitiveScore, 0f);
+
+    scorer.doc = 3;
     scorer.score = 0.5f;
     // Make sure we do not call setMinCompetitiveScore for non-competitive hits
     scorer.minCompetitiveScore = Float.NaN;
-    leafCollector.collect(2);
+    leafCollector.collect(3);
     assertTrue(Float.isNaN(scorer.minCompetitiveScore));
 
-    scorer.doc = 3;
+    scorer.doc = 4;
     scorer.score = 4;
-    leafCollector.collect(3);
-    assertEquals(Math.nextUp(2f), scorer.minCompetitiveScore, 0f);
+    leafCollector.collect(4);
+    assertEquals(Math.nextUp(3f), scorer.minCompetitiveScore, 0f);
 
     // Make sure the min score is set on scorers on new segments
     scorer = new ScoreAndDoc();
     leafCollector = collector.getLeafCollector(reader.leaves().get(1));
     leafCollector.setScorer(scorer);
-    assertEquals(Math.nextUp(2f), scorer.minCompetitiveScore, 0f);
+    assertEquals(Math.nextUp(3f), scorer.minCompetitiveScore, 0f);
 
     scorer.doc = 0;
     scorer.score = 1;
     leafCollector.collect(0);
-    assertEquals(Math.nextUp(2f), scorer.minCompetitiveScore, 0f);
+    assertEquals(Math.nextUp(3f), scorer.minCompetitiveScore, 0f);
 
     scorer.doc = 1;
-    scorer.score = 3;
+    scorer.score = 4;
     leafCollector.collect(1);
-    assertEquals(Math.nextUp(3f), scorer.minCompetitiveScore, 0f);
+    assertEquals(Math.nextUp(4f), scorer.minCompetitiveScore, 0f);
 
     reader.close();
     dir.close();
@@ -380,6 +387,36 @@ public class TestTopDocsCollector extends LuceneTestCase {
     reader.close();
     dir.close();
   }
+  
+  public void testRelationVsTopDocsCount() throws Exception {
+    try (Directory dir = newDirectory();
+        IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE))) {
+      Document doc = new Document();
+      doc.add(new TextField("f", "foo bar", Store.NO));
+      w.addDocuments(Arrays.asList(doc, doc, doc, doc, doc));
+      w.flush();
+      w.addDocuments(Arrays.asList(doc, doc, doc, doc, doc));
+      w.flush();
+      
+      try (IndexReader reader = DirectoryReader.open(w)) {
+        IndexSearcher searcher = new IndexSearcher(reader);
+        TopScoreDocCollector collector = TopScoreDocCollector.create(2, null, 10);
+        searcher.search(new TermQuery(new Term("f", "foo")), collector);
+        assertEquals(10, collector.totalHits);
+        assertEquals(TotalHits.Relation.EQUAL_TO, collector.totalHitsRelation);
+        
+        collector = TopScoreDocCollector.create(2, null, 2);
+        searcher.search(new TermQuery(new Term("f", "foo")), collector);
+        assertTrue(10 >= collector.totalHits);
+        assertEquals(TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO, collector.totalHitsRelation);
+        
+        collector = TopScoreDocCollector.create(10, null, 2);
+        searcher.search(new TermQuery(new Term("f", "foo")), collector);
+        assertEquals(10, collector.totalHits);
+        assertEquals(TotalHits.Relation.EQUAL_TO, collector.totalHitsRelation);
+      }
+    }
+  }
 
   public void testConcurrentMinScore() throws Exception {
     Directory dir = newDirectory();
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java b/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java
index ed814664..7759891 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java
@@ -294,7 +294,7 @@ public class TestTopFieldCollector extends LuceneTestCase {
     w.close();
 
     Sort sort = new Sort(FIELD_SCORE, new SortField("foo", SortField.Type.LONG));
-    TopFieldCollector collector = TopFieldCollector.create(sort, 2, null, 1);
+    TopFieldCollector collector = TopFieldCollector.create(sort, 2, null, 2);
     ScoreAndDoc scorer = new ScoreAndDoc();
 
     LeafCollector leafCollector = collector.getLeafCollector(reader.leaves().get(0));
@@ -309,35 +309,40 @@ public class TestTopFieldCollector extends LuceneTestCase {
     scorer.doc = 1;
     scorer.score = 2;
     leafCollector.collect(1);
-    assertEquals(1f, scorer.minCompetitiveScore, 0f);
-
+    assertNull(scorer.minCompetitiveScore);
+    
     scorer.doc = 2;
+    scorer.score = 3;
+    leafCollector.collect(2);
+    assertEquals(2f, scorer.minCompetitiveScore, 0f);
+
+    scorer.doc = 3;
     scorer.score = 0.5f;
     // Make sure we do not call setMinCompetitiveScore for non-competitive hits
     scorer.minCompetitiveScore = Float.NaN;
-    leafCollector.collect(2);
+    leafCollector.collect(3);
     assertTrue(Float.isNaN(scorer.minCompetitiveScore));
 
-    scorer.doc = 3;
+    scorer.doc = 4;
     scorer.score = 4;
-    leafCollector.collect(3);
-    assertEquals(2f, scorer.minCompetitiveScore, 0f);
+    leafCollector.collect(4);
+    assertEquals(3f, scorer.minCompetitiveScore, 0f);
 
     // Make sure the min score is set on scorers on new segments
     scorer = new ScoreAndDoc();
     leafCollector = collector.getLeafCollector(reader.leaves().get(1));
     leafCollector.setScorer(scorer);
-    assertEquals(2f, scorer.minCompetitiveScore, 0f);
+    assertEquals(3f, scorer.minCompetitiveScore, 0f);
 
     scorer.doc = 0;
     scorer.score = 1;
     leafCollector.collect(0);
-    assertEquals(2f, scorer.minCompetitiveScore, 0f);
+    assertEquals(3f, scorer.minCompetitiveScore, 0f);
 
     scorer.doc = 1;
-    scorer.score = 3;
+    scorer.score = 4;
     leafCollector.collect(1);
-    assertEquals(3f, scorer.minCompetitiveScore, 0f);
+    assertEquals(4f, scorer.minCompetitiveScore, 0f);
 
     reader.close();
     dir.close();
@@ -690,5 +695,36 @@ public class TestTopFieldCollector extends LuceneTestCase {
     indexReader.close();
     dir.close();
   }
+  
+  public void testRelationVsTopDocsCount() throws Exception {
+    Sort sort = new Sort(SortField.FIELD_SCORE, SortField.FIELD_DOC);
+    try (Directory dir = newDirectory();
+        IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE))) {
+      Document doc = new Document();
+      doc.add(new TextField("f", "foo bar", Store.NO));
+      w.addDocuments(Arrays.asList(doc, doc, doc, doc, doc));
+      w.flush();
+      w.addDocuments(Arrays.asList(doc, doc, doc, doc, doc));
+      w.flush();
+      
+      try (IndexReader reader = DirectoryReader.open(w)) {
+        IndexSearcher searcher = new IndexSearcher(reader);
+        TopFieldCollector collector = TopFieldCollector.create(sort, 2, 10);
+        searcher.search(new TermQuery(new Term("f", "foo")), collector);
+        assertEquals(10, collector.totalHits);
+        assertEquals(TotalHits.Relation.EQUAL_TO, collector.totalHitsRelation);
+        
+        collector = TopFieldCollector.create(sort, 2, 2);
+        searcher.search(new TermQuery(new Term("f", "foo")), collector);
+        assertTrue(10 >= collector.totalHits);
+        assertEquals(TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO, collector.totalHitsRelation);
+        
+        collector = TopFieldCollector.create(sort, 10, 2);
+        searcher.search(new TermQuery(new Term("f", "foo")), collector);
+        assertEquals(10, collector.totalHits);
+        assertEquals(TotalHits.Relation.EQUAL_TO, collector.totalHitsRelation);
+      }
+    }
+  }
 
 }